zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,731 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Tai Chi Minh Ralph Eastwood (self), Matt Wong (Guarnerix Inc dba Liquidaty)
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
#include <stdlib.h>
|
|
11
|
+
#include <string.h>
|
|
12
|
+
#include <ctype.h>
|
|
13
|
+
#include <stdint.h> // uint32_t
|
|
14
|
+
|
|
15
|
+
#ifdef ZSV_EXTRAS
|
|
16
|
+
#include <time.h>
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
#include <zsv/utils/utf8.h>
|
|
20
|
+
#include <zsv/utils/compiler.h>
|
|
21
|
+
#include <zsv/utils/string.h>
|
|
22
|
+
|
|
23
|
+
#if !defined(__AVX2__) // -mavx2 compiler flag not present
|
|
24
|
+
#define ZSV_NO_AVX
|
|
25
|
+
#define zsv_mask_t uint16_t
|
|
26
|
+
#define VECTOR_BYTES 16
|
|
27
|
+
#define NEXT_BIT __builtin_ffs
|
|
28
|
+
#if defined(__AVX__)
|
|
29
|
+
#include <emmintrin.h>
|
|
30
|
+
#define zsv_mask_t uint16_t
|
|
31
|
+
#define VECTOR_BYTES 16
|
|
32
|
+
#define NEXT_BIT __builtin_ffs
|
|
33
|
+
#define movemask_pseudo(x) _mm_movemask_epi8((__m128i)x)
|
|
34
|
+
#endif
|
|
35
|
+
#elif defined(HAVE_AVX512)
|
|
36
|
+
#ifndef __AVX512BW__
|
|
37
|
+
#error AVX512 requested, but __AVX512BW__ macro not defined
|
|
38
|
+
#else
|
|
39
|
+
#include <immintrin.h>
|
|
40
|
+
#define VECTOR_BYTES 64
|
|
41
|
+
#define zsv_mask_t uint64_t
|
|
42
|
+
#define movemask_pseudo(x) _mm512_movepi8_mask((__m512i)x)
|
|
43
|
+
#define NEXT_BIT __builtin_ffsl
|
|
44
|
+
#endif
|
|
45
|
+
#elif defined(__AVX2__) // have avx2, not avx512
|
|
46
|
+
#include <immintrin.h>
|
|
47
|
+
#define VECTOR_BYTES 32
|
|
48
|
+
#define zsv_mask_t uint32_t
|
|
49
|
+
#define movemask_pseudo(x) _mm256_movemask_epi8((__m256i)x)
|
|
50
|
+
#define NEXT_BIT __builtin_ffs
|
|
51
|
+
#else
|
|
52
|
+
#define ZSV_NO_AVX
|
|
53
|
+
#define zsv_mask_t uint16_t
|
|
54
|
+
#define VECTOR_BYTES 16
|
|
55
|
+
#define NEXT_BIT __builtin_ffs
|
|
56
|
+
#endif
|
|
57
|
+
|
|
58
|
+
typedef unsigned char zsv_uc_vector __attribute__((vector_size(VECTOR_BYTES)));
|
|
59
|
+
|
|
60
|
+
struct zsv_row {
|
|
61
|
+
size_t used, allocated, overflow;
|
|
62
|
+
struct zsv_cell *cells;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
struct collate_header {
|
|
66
|
+
struct {
|
|
67
|
+
unsigned char *buff;
|
|
68
|
+
size_t used;
|
|
69
|
+
} buff;
|
|
70
|
+
size_t *lengths; // length PLUS 1 of each cell
|
|
71
|
+
size_t column_count;
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
struct zsv_scan_delim_regs {
|
|
75
|
+
size_t i;
|
|
76
|
+
size_t bytes_chunk_end;
|
|
77
|
+
size_t bytes_read;
|
|
78
|
+
char delimiter;
|
|
79
|
+
unsigned char c;
|
|
80
|
+
char skip_next_delim;
|
|
81
|
+
int quote;
|
|
82
|
+
size_t mask_total_offset;
|
|
83
|
+
zsv_mask_t mask;
|
|
84
|
+
int mask_last_start;
|
|
85
|
+
unsigned char location;
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
struct zsv_scan_fixed_regs {
|
|
89
|
+
char xx; // to do
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
#ifdef ZSV_EXTRAS
|
|
93
|
+
#include <inttypes.h>
|
|
94
|
+
#include <sqlite3.h>
|
|
95
|
+
|
|
96
|
+
struct zsv_overwrite {
|
|
97
|
+
struct zsv_overwrite_data odata;
|
|
98
|
+
void *ctx;
|
|
99
|
+
enum zsv_status (*next)(void *ctx, struct zsv_overwrite_data *odata);
|
|
100
|
+
enum zsv_status (*close)(void *ctx);
|
|
101
|
+
};
|
|
102
|
+
#endif
|
|
103
|
+
|
|
104
|
+
struct zsv_scanner {
|
|
105
|
+
char last;
|
|
106
|
+
struct {
|
|
107
|
+
unsigned char *buff; // provided by caller
|
|
108
|
+
size_t size; // provided by caller
|
|
109
|
+
} buff;
|
|
110
|
+
|
|
111
|
+
size_t cell_start;
|
|
112
|
+
unsigned char quoted : 7; // bitfield of ZSV_PARSER_QUOTE_XXX flags
|
|
113
|
+
unsigned char buffer_exceeded : 1;
|
|
114
|
+
|
|
115
|
+
unsigned char waiting_for_end : 1;
|
|
116
|
+
unsigned char checked_bom : 1;
|
|
117
|
+
unsigned char free_buff : 1;
|
|
118
|
+
unsigned char finished : 1;
|
|
119
|
+
unsigned char had_bom : 1;
|
|
120
|
+
unsigned char abort : 1;
|
|
121
|
+
unsigned char have_cell : 1;
|
|
122
|
+
unsigned char started : 1;
|
|
123
|
+
|
|
124
|
+
size_t quote_close_position;
|
|
125
|
+
struct zsv_opts opts;
|
|
126
|
+
|
|
127
|
+
size_t row_start;
|
|
128
|
+
struct zsv_row row;
|
|
129
|
+
|
|
130
|
+
size_t scanned_length;
|
|
131
|
+
size_t cum_scanned_length;
|
|
132
|
+
size_t partial_row_length;
|
|
133
|
+
|
|
134
|
+
size_t (*read)(void *buff, size_t n, size_t size, void *in);
|
|
135
|
+
void *in;
|
|
136
|
+
|
|
137
|
+
size_t (*filter)(void *ctx, unsigned char *buff, size_t bytes_read);
|
|
138
|
+
void *filter_ctx;
|
|
139
|
+
|
|
140
|
+
size_t buffer_end;
|
|
141
|
+
size_t old_bytes_read; // only non-zero if we must shift upon next parse_more()
|
|
142
|
+
|
|
143
|
+
const char *insert_string;
|
|
144
|
+
|
|
145
|
+
size_t empty_header_rows;
|
|
146
|
+
|
|
147
|
+
struct zsv_opts opts_orig;
|
|
148
|
+
|
|
149
|
+
#define ZSV_MODE_DELIM 0
|
|
150
|
+
#define ZSV_MODE_FIXED 1
|
|
151
|
+
#define ZSV_MODE_DELIM_PULL 2
|
|
152
|
+
unsigned char mode;
|
|
153
|
+
struct {
|
|
154
|
+
unsigned *offsets; // 0-based position of each cell end. offset[0] = end of first cell
|
|
155
|
+
unsigned count; // number of offsets
|
|
156
|
+
} fixed;
|
|
157
|
+
|
|
158
|
+
struct collate_header *collate_header;
|
|
159
|
+
size_t data_row_count; /* 0 = in header row; 1 = first data row */
|
|
160
|
+
struct zsv_cell (*get_cell)(zsv_parser parser, size_t ix);
|
|
161
|
+
|
|
162
|
+
struct {
|
|
163
|
+
union {
|
|
164
|
+
struct zsv_scan_delim_regs delim;
|
|
165
|
+
struct zsv_scan_fixed_regs fixed;
|
|
166
|
+
} *regs;
|
|
167
|
+
enum zsv_status stat; // last status
|
|
168
|
+
unsigned char *buff;
|
|
169
|
+
size_t bytes_read;
|
|
170
|
+
size_t row_used;
|
|
171
|
+
unsigned char now;
|
|
172
|
+
} pull;
|
|
173
|
+
|
|
174
|
+
int (*errprintf)(void *ctx, const char *format, ...);
|
|
175
|
+
void *errf;
|
|
176
|
+
int (*errclose)(void *ctx);
|
|
177
|
+
|
|
178
|
+
#ifdef ZSV_EXTRAS
|
|
179
|
+
struct {
|
|
180
|
+
size_t cum_row_count; /* total number of rows read */
|
|
181
|
+
time_t last_time; /* last time from which to check seconds_interval */
|
|
182
|
+
size_t max_rows; /* max rows to read, including header row(s) */
|
|
183
|
+
} progress;
|
|
184
|
+
struct zsv_overwrite overwrite;
|
|
185
|
+
#endif
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
void collate_header_destroy(struct collate_header **chp) {
|
|
189
|
+
if (*chp) {
|
|
190
|
+
struct collate_header *ch = *chp;
|
|
191
|
+
free(ch->buff.buff);
|
|
192
|
+
free(ch->lengths);
|
|
193
|
+
free(ch);
|
|
194
|
+
*chp = NULL;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/* collate_header_append(): return err */
|
|
199
|
+
static int collate_header_append(struct zsv_scanner *scanner, struct collate_header **chp) {
|
|
200
|
+
if (!*chp) {
|
|
201
|
+
if ((*chp = calloc(1, sizeof(struct collate_header))))
|
|
202
|
+
(*chp)->lengths = calloc(scanner->row.allocated, sizeof(*(*chp)->lengths));
|
|
203
|
+
if (!(*chp) || !(*chp)->lengths) {
|
|
204
|
+
free(*chp);
|
|
205
|
+
scanner->errprintf(scanner->errf, "Out of memory!\n");
|
|
206
|
+
return -1;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
struct collate_header *ch = *chp;
|
|
210
|
+
size_t this_row_size = 0;
|
|
211
|
+
size_t column_count = zsv_cell_count(scanner);
|
|
212
|
+
for (size_t i = 0, j = column_count; i < j; i++) {
|
|
213
|
+
struct zsv_cell c = zsv_get_cell_1(scanner, i);
|
|
214
|
+
if (c.len)
|
|
215
|
+
this_row_size += c.len + 1; // +1: terminating null or delim
|
|
216
|
+
}
|
|
217
|
+
size_t new_row_size = ch->buff.used + this_row_size;
|
|
218
|
+
unsigned char *new_row = realloc(ch->buff.buff, new_row_size);
|
|
219
|
+
if (!new_row) {
|
|
220
|
+
scanner->errprintf(scanner->errf, "Out of memory!\n");
|
|
221
|
+
return -1;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// now: splice the new row into the old row, starting with the last cell
|
|
225
|
+
// e.g. prior row = A1.B1.C1.
|
|
226
|
+
// this row = A2.B2.C2.
|
|
227
|
+
// new_row = A1.B1.C1..........
|
|
228
|
+
// starting with last cell in this row, move the old data, then splice new:
|
|
229
|
+
// new_row = A1.B1.C1.......C2.
|
|
230
|
+
// new_row = A1.B1.C1....C1 C2.
|
|
231
|
+
// new_row = A1.B1.C1.B2.C1 C2.
|
|
232
|
+
// new_row = A1.B1.B1 B2.C1 C2.
|
|
233
|
+
// new_row = A1.A2.B1 B2.C1 C2.
|
|
234
|
+
// new_row = A1 A2.B1 B2.C1 C2.
|
|
235
|
+
|
|
236
|
+
size_t new_row_end = ch->buff.used + this_row_size;
|
|
237
|
+
size_t old_row_end = ch->buff.used;
|
|
238
|
+
ch->buff.used += this_row_size;
|
|
239
|
+
ch->buff.buff = new_row;
|
|
240
|
+
for (size_t i = column_count; i > 0; i--) {
|
|
241
|
+
struct zsv_cell c = zsv_get_cell_1(scanner, i - 1);
|
|
242
|
+
// copy new row's cell value to end
|
|
243
|
+
if (c.len) {
|
|
244
|
+
memcpy(new_row + new_row_end - c.len - 1, c.str, c.len);
|
|
245
|
+
new_row[new_row_end - 1] = ' ';
|
|
246
|
+
new_row_end = new_row_end - c.len - 1;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// move prior cell value
|
|
250
|
+
size_t old_cell_len = ch->lengths[i - 1]; // old_cell_len includes delim
|
|
251
|
+
if (old_cell_len) {
|
|
252
|
+
// need memmove, not memcpy
|
|
253
|
+
memmove(new_row + new_row_end - old_cell_len, new_row + old_row_end - old_cell_len, old_cell_len);
|
|
254
|
+
old_row_end -= old_cell_len;
|
|
255
|
+
new_row_end -= old_cell_len;
|
|
256
|
+
}
|
|
257
|
+
if (c.len)
|
|
258
|
+
ch->lengths[i - 1] += c.len + 1;
|
|
259
|
+
}
|
|
260
|
+
if (column_count > ch->column_count)
|
|
261
|
+
ch->column_count = column_count;
|
|
262
|
+
return 0;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
__attribute__((always_inline)) static inline void zsv_clear_cell(struct zsv_scanner *scanner) {
|
|
266
|
+
scanner->quoted = 0;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// always_inline has a noticeable impact. do not remove without benchmarking!
|
|
270
|
+
__attribute__((always_inline)) static inline void cell_dl(struct zsv_scanner *scanner, unsigned char *s, size_t n) {
|
|
271
|
+
// handle quoting
|
|
272
|
+
if (VERY_LIKELY(!scanner->buffer_exceeded)) {
|
|
273
|
+
if (UNLIKELY(scanner->quoted > 0)) {
|
|
274
|
+
if (LIKELY(scanner->quote_close_position + 1 == n)) {
|
|
275
|
+
if (LIKELY((scanner->quoted & ZSV_PARSER_QUOTE_EMBEDDED) == 0)) {
|
|
276
|
+
// this is the easy and usual case: no embedded double-quotes
|
|
277
|
+
// just remove surrounding quotes from content
|
|
278
|
+
s++;
|
|
279
|
+
n -= 2;
|
|
280
|
+
} else { // embedded dbl-quotes to remove
|
|
281
|
+
s++;
|
|
282
|
+
n--;
|
|
283
|
+
// remove dbl-quotes. TO DO: consider adding option to skip this
|
|
284
|
+
for (size_t i = 0; i + 1 < n; i++) {
|
|
285
|
+
if (s[i] == '"' && s[i + 1] == '"') {
|
|
286
|
+
if (n > i + 2)
|
|
287
|
+
memmove(s + i + 1, s + i + 2, n - i - 2);
|
|
288
|
+
n--;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
n--;
|
|
292
|
+
}
|
|
293
|
+
} else {
|
|
294
|
+
if (scanner->quote_close_position) {
|
|
295
|
+
// the first char was a quote, and we have content after the closing quote
|
|
296
|
+
// the solution below is a generalized on that will work
|
|
297
|
+
// for the easy and usual case, but by handling separately
|
|
298
|
+
// we avoid the memmove in the easy / usual case
|
|
299
|
+
memmove(s + 1, s, scanner->quote_close_position);
|
|
300
|
+
s += 2;
|
|
301
|
+
n -= 2;
|
|
302
|
+
if (UNLIKELY((scanner->quoted & ZSV_PARSER_QUOTE_EMBEDDED) != 0)) {
|
|
303
|
+
// remove dbl-quotes
|
|
304
|
+
for (size_t i = 0; i + 1 < n; i++) {
|
|
305
|
+
if (s[i] == '"' && s[i + 1] == '"') {
|
|
306
|
+
if (n > i + 2)
|
|
307
|
+
memmove(s + i + 1, s + i + 2, n - i - 2);
|
|
308
|
+
n--;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
} else if (UNLIKELY(scanner->opts.delimiter != ',')) {
|
|
315
|
+
if (memchr(s, ',', n))
|
|
316
|
+
scanner->quoted = ZSV_PARSER_QUOTE_NEEDED;
|
|
317
|
+
}
|
|
318
|
+
// end quote handling
|
|
319
|
+
|
|
320
|
+
if (scanner->opts.malformed_utf8_replace) {
|
|
321
|
+
if (scanner->opts.malformed_utf8_replace < 0)
|
|
322
|
+
n = zsv_strencode(s, n, 0, NULL, NULL);
|
|
323
|
+
else
|
|
324
|
+
n = zsv_strencode(s, n, scanner->opts.malformed_utf8_replace, NULL, NULL);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
if (UNLIKELY(scanner->opts.cell_handler != NULL))
|
|
328
|
+
scanner->opts.cell_handler(scanner->opts.ctx, s, n);
|
|
329
|
+
if (VERY_LIKELY(scanner->row.used < scanner->row.allocated)) {
|
|
330
|
+
struct zsv_row *row = &scanner->row;
|
|
331
|
+
struct zsv_cell c = {s, n, scanner->opts.no_quotes ? 1 : scanner->quoted, 0};
|
|
332
|
+
row->cells[row->used++] = c;
|
|
333
|
+
} else
|
|
334
|
+
scanner->row.overflow++;
|
|
335
|
+
scanner->have_cell = 1;
|
|
336
|
+
|
|
337
|
+
zsv_clear_cell(scanner);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
__attribute__((always_inline)) static inline enum zsv_status row_dl(struct zsv_scanner *scanner) {
|
|
341
|
+
if (VERY_UNLIKELY(scanner->row.overflow)) {
|
|
342
|
+
scanner->errprintf(scanner->errf, "Warning: number of columns (%zu) exceeds row max (%zu)\n",
|
|
343
|
+
scanner->row.allocated + scanner->row.overflow, scanner->row.allocated);
|
|
344
|
+
scanner->row.overflow = 0;
|
|
345
|
+
}
|
|
346
|
+
if (VERY_LIKELY(scanner->opts.row_handler != NULL)) // TO DO: disallow row_handler to be null; if null, set to dummy
|
|
347
|
+
scanner->opts.row_handler(scanner->opts.ctx);
|
|
348
|
+
// Note: scanner->data_row_count will be incremented AFTER this call
|
|
349
|
+
// in order to accommodate pull parsing, in which case incrementing here
|
|
350
|
+
// would be too early
|
|
351
|
+
#ifdef ZSV_EXTRAS
|
|
352
|
+
scanner->progress.cum_row_count++;
|
|
353
|
+
if (VERY_UNLIKELY(scanner->opts.progress.rows_interval &&
|
|
354
|
+
scanner->progress.cum_row_count % scanner->opts.progress.rows_interval == 0)) {
|
|
355
|
+
char ok;
|
|
356
|
+
if (!scanner->opts.progress.seconds_interval)
|
|
357
|
+
ok = 1;
|
|
358
|
+
else {
|
|
359
|
+
// using timer_create() would be better, but is not currently supported on
|
|
360
|
+
// all platforms, so the fallback is to poll
|
|
361
|
+
time_t now = time(NULL);
|
|
362
|
+
if (now > scanner->progress.last_time &&
|
|
363
|
+
(unsigned int)(now - scanner->progress.last_time) >= scanner->opts.progress.seconds_interval) {
|
|
364
|
+
ok = 1;
|
|
365
|
+
scanner->progress.last_time = now;
|
|
366
|
+
} else
|
|
367
|
+
ok = 0;
|
|
368
|
+
}
|
|
369
|
+
if (ok && scanner->opts.progress.callback)
|
|
370
|
+
scanner->abort = scanner->opts.progress.callback(scanner->opts.progress.ctx, scanner->progress.cum_row_count);
|
|
371
|
+
#ifndef NDEBUG
|
|
372
|
+
if (scanner->abort)
|
|
373
|
+
scanner->errprintf(scanner->errf, "ZSV parsing aborted at %zu\n", scanner->progress.cum_row_count);
|
|
374
|
+
#endif
|
|
375
|
+
}
|
|
376
|
+
if (VERY_UNLIKELY(scanner->progress.max_rows > 0)) {
|
|
377
|
+
if (VERY_UNLIKELY(scanner->progress.cum_row_count == scanner->progress.max_rows)) {
|
|
378
|
+
scanner->abort = 1;
|
|
379
|
+
scanner->row.used = 0;
|
|
380
|
+
return zsv_status_max_rows_read;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
#endif
|
|
385
|
+
if (VERY_UNLIKELY(scanner->abort))
|
|
386
|
+
return zsv_status_cancelled;
|
|
387
|
+
scanner->have_cell = 0;
|
|
388
|
+
scanner->row.used = 0;
|
|
389
|
+
return zsv_status_ok;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
__attribute__((always_inline)) static inline enum zsv_status cell_and_row_dl(struct zsv_scanner *scanner,
|
|
393
|
+
unsigned char *s, size_t n) {
|
|
394
|
+
cell_dl(scanner, s, n);
|
|
395
|
+
return row_dl(scanner);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
#ifndef movemask_pseudo
|
|
399
|
+
/*
|
|
400
|
+
provide our own pseudo-movemask, which sets the 1 bit for each corresponding
|
|
401
|
+
non-zero value in the vector (as opposed to real movemask which sets the bit
|
|
402
|
+
only for each corresponding non-zero highest-bit value in the vector)
|
|
403
|
+
*/
|
|
404
|
+
|
|
405
|
+
#if defined(__EMSCRIPTEN__) && defined(__SSE2__)
|
|
406
|
+
#include <wasm_simd128.h>
|
|
407
|
+
#define movemask_pseudo(x) wasm_i8x16_bitmask(x)
|
|
408
|
+
|
|
409
|
+
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
|
|
410
|
+
#include <arm_neon.h>
|
|
411
|
+
static inline zsv_mask_t movemask_pseudo(zsv_uc_vector v) {
|
|
412
|
+
// see https://stackoverflow.com/questions/11870910/
|
|
413
|
+
static const uint8_t
|
|
414
|
+
__attribute__((aligned(16))) _powers[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128};
|
|
415
|
+
uint8x16_t mm_powers = vld1q_u8(_powers);
|
|
416
|
+
|
|
417
|
+
// compute the mask from the input
|
|
418
|
+
uint64x2_t imask = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(v, mm_powers))));
|
|
419
|
+
|
|
420
|
+
// Get the resulting bytes
|
|
421
|
+
uint16_t mask;
|
|
422
|
+
vst1q_lane_u8((uint8_t *)&mask + 0, (uint8x16_t)imask, 0);
|
|
423
|
+
vst1q_lane_u8((uint8_t *)&mask + 1, (uint8x16_t)imask, 8);
|
|
424
|
+
return mask;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
#elif defined(__SSE2__)
|
|
428
|
+
|
|
429
|
+
typedef char zsv_c_vector __attribute__((vector_size(VECTOR_BYTES)));
|
|
430
|
+
#define movemask_pseudo(x) __builtin_ia32_pmovmskb128((zsv_c_vector)x)
|
|
431
|
+
|
|
432
|
+
#else
|
|
433
|
+
|
|
434
|
+
// slow path
|
|
435
|
+
#if defined(__EMSCRIPTEN__)
|
|
436
|
+
#warning \
|
|
437
|
+
"Compiling with emscripten, without using SIMD. To use SIMD, compile with -msse2 -msimd128 -experimental-wasm-simd and -I/path/to/emsdk/upstream/lib/clang/16.0.0/include"
|
|
438
|
+
#endif
|
|
439
|
+
|
|
440
|
+
static inline zsv_mask_t movemask_pseudo(zsv_uc_vector v) {
|
|
441
|
+
zsv_mask_t mask = 0, tmp = 1;
|
|
442
|
+
for (size_t i = 0; i < sizeof(zsv_uc_vector); i++) {
|
|
443
|
+
mask |= (v[i] ? tmp : 0);
|
|
444
|
+
tmp <<= 1;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
return mask;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
#endif // __EMSCRIPTEN__
|
|
451
|
+
#endif // ndef movemask_pseudo
|
|
452
|
+
|
|
453
|
+
#include "vector_delim.c"
|
|
454
|
+
|
|
455
|
+
#ifdef ZSV_SUPPORT_PULL_PARSER
|
|
456
|
+
#undef ZSV_SUPPORT_PULL_PARSER
|
|
457
|
+
#endif
|
|
458
|
+
#define ZSV_SCAN_DELIM zsv_scan_delim
|
|
459
|
+
#include "zsv_scan_delim.c"
|
|
460
|
+
#undef ZSV_SCAN_DELIM
|
|
461
|
+
#undef scanner_last
|
|
462
|
+
|
|
463
|
+
#define ZSV_SUPPORT_PULL_PARSER 1
|
|
464
|
+
#define ZSV_SCAN_DELIM zsv_scan_delim_pull
|
|
465
|
+
#include "zsv_scan_delim.c"
|
|
466
|
+
|
|
467
|
+
#include "zsv_scan_fixed.c"
|
|
468
|
+
|
|
469
|
+
static enum zsv_status zsv_scan(struct zsv_scanner *scanner, unsigned char *buff, size_t bytes_read) {
|
|
470
|
+
switch (scanner->mode) {
|
|
471
|
+
case ZSV_MODE_FIXED:
|
|
472
|
+
return zsv_scan_fixed(scanner, buff, bytes_read);
|
|
473
|
+
case ZSV_MODE_DELIM_PULL:
|
|
474
|
+
// return zsv_status_row or zsv_status_ok (next call to parse_more)
|
|
475
|
+
return zsv_scan_delim_pull(scanner, buff, bytes_read);
|
|
476
|
+
default:
|
|
477
|
+
return zsv_scan_delim(scanner, buff, bytes_read);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
#define ZSV_BOM "\xef\xbb\xbf"
|
|
482
|
+
|
|
483
|
+
// optional: set a filter function to filter data before it is processed
|
|
484
|
+
// function should return the number of bytes to process. this may be smaller
|
|
485
|
+
// but may not be larger than the original number of bytes, and any data modification
|
|
486
|
+
// must be done in-place to *buff
|
|
487
|
+
enum zsv_status zsv_set_scan_filter(struct zsv_scanner *scanner,
|
|
488
|
+
size_t (*filter)(void *ctx, unsigned char *buff, size_t bytes_read), void *ctx) {
|
|
489
|
+
scanner->filter = filter;
|
|
490
|
+
scanner->filter_ctx = ctx;
|
|
491
|
+
return zsv_status_ok;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
static void apply_callbacks(struct zsv_scanner *scanner) {
|
|
495
|
+
if (UNLIKELY(scanner->opts.cell_handler != NULL)) {
|
|
496
|
+
// call the user-provided cell() callback on each cell
|
|
497
|
+
unsigned char saved_quoted = scanner->quoted;
|
|
498
|
+
for (size_t i = 0, j = zsv_cell_count(scanner); i < j; i++) {
|
|
499
|
+
struct zsv_cell c = zsv_get_cell_1(scanner, i);
|
|
500
|
+
scanner->quoted = c.quoted;
|
|
501
|
+
scanner->opts.cell_handler(scanner->opts.ctx, c.str, c.len);
|
|
502
|
+
}
|
|
503
|
+
scanner->quoted = saved_quoted;
|
|
504
|
+
}
|
|
505
|
+
// call the user-provided row() callback
|
|
506
|
+
if (VERY_LIKELY(scanner->opts.row_handler != NULL))
|
|
507
|
+
scanner->opts.row_handler(scanner->opts.ctx);
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
static void set_callbacks(struct zsv_scanner *scanner);
|
|
511
|
+
|
|
512
|
+
static char zsv_internal_row_is_blank(zsv_parser parser) {
|
|
513
|
+
for (unsigned int i = 0; i < parser->row.used; i++)
|
|
514
|
+
if (parser->row.cells[i].len)
|
|
515
|
+
return 0;
|
|
516
|
+
return 1;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
static void skip_to_first_row_w_data(void *ctx) {
|
|
520
|
+
struct zsv_scanner *scanner = ctx;
|
|
521
|
+
if (LIKELY(zsv_internal_row_is_blank(scanner) == 0)) {
|
|
522
|
+
scanner->opts.keep_empty_header_rows = 1;
|
|
523
|
+
if (scanner->empty_header_rows) {
|
|
524
|
+
scanner->errprintf(scanner->errf, "Warning: skipped %zu empty header rows; suggest using:\n --skip-head %zu\n",
|
|
525
|
+
scanner->empty_header_rows, scanner->empty_header_rows + scanner->opts_orig.rows_to_ignore);
|
|
526
|
+
}
|
|
527
|
+
set_callbacks(scanner);
|
|
528
|
+
apply_callbacks(scanner);
|
|
529
|
+
} else // entire row was empty
|
|
530
|
+
scanner->empty_header_rows++;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
static void ignore_header_rows(void *ctx) {
|
|
534
|
+
struct zsv_scanner *scanner = ctx;
|
|
535
|
+
if (scanner->opts.rows_to_ignore)
|
|
536
|
+
scanner->opts.rows_to_ignore--;
|
|
537
|
+
if (!scanner->opts.rows_to_ignore)
|
|
538
|
+
set_callbacks(scanner);
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
static void collate_header_row(void *ctx) {
|
|
542
|
+
struct zsv_scanner *scanner = ctx;
|
|
543
|
+
if (scanner->opts.header_span) {
|
|
544
|
+
--scanner->opts.header_span;
|
|
545
|
+
|
|
546
|
+
// save this row
|
|
547
|
+
|
|
548
|
+
// first, make sure this row has at least as many cells as the largest prior row
|
|
549
|
+
if (scanner->collate_header) {
|
|
550
|
+
for (size_t i = zsv_cell_count(scanner); i < scanner->row.allocated && i < scanner->collate_header->column_count;
|
|
551
|
+
i++)
|
|
552
|
+
memset(&scanner->row.cells[i], 0, sizeof(scanner->row.cells[i]));
|
|
553
|
+
scanner->row.used = scanner->collate_header->column_count;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
if (collate_header_append(scanner, &scanner->collate_header))
|
|
557
|
+
scanner->abort = 1;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
if (!scanner->opts.header_span) {
|
|
561
|
+
// finished with header; combine all rows into a single row
|
|
562
|
+
set_callbacks(scanner);
|
|
563
|
+
if (scanner->collate_header) {
|
|
564
|
+
size_t offset = 0;
|
|
565
|
+
for (size_t i = 0; i < scanner->collate_header->column_count; i++) {
|
|
566
|
+
size_t len_plus1 = scanner->collate_header->lengths[i];
|
|
567
|
+
scanner->row.cells[i].str = scanner->collate_header->buff.buff + offset;
|
|
568
|
+
if (len_plus1) {
|
|
569
|
+
scanner->row.cells[i].len = len_plus1 - 1;
|
|
570
|
+
scanner->row.cells[i].quoted = 1;
|
|
571
|
+
} else
|
|
572
|
+
scanner->row.cells[i].len = 0;
|
|
573
|
+
offset += len_plus1;
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
apply_callbacks(scanner);
|
|
578
|
+
if (scanner->mode != ZSV_MODE_DELIM_PULL)
|
|
579
|
+
collate_header_destroy(&scanner->collate_header);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
static void set_callbacks(struct zsv_scanner *scanner) {
|
|
584
|
+
if (scanner->opts.rows_to_ignore) {
|
|
585
|
+
scanner->opts.row_handler = ignore_header_rows;
|
|
586
|
+
scanner->opts.cell_handler = NULL;
|
|
587
|
+
scanner->opts.ctx = scanner;
|
|
588
|
+
} else if (scanner->mode != ZSV_MODE_FIXED && !scanner->opts.keep_empty_header_rows) {
|
|
589
|
+
scanner->opts.row_handler = skip_to_first_row_w_data;
|
|
590
|
+
scanner->opts.cell_handler = NULL;
|
|
591
|
+
scanner->opts.ctx = scanner;
|
|
592
|
+
} else if (scanner->opts.header_span > 1) {
|
|
593
|
+
scanner->opts.row_handler = collate_header_row;
|
|
594
|
+
scanner->opts.cell_handler = NULL;
|
|
595
|
+
scanner->opts.ctx = scanner;
|
|
596
|
+
} else {
|
|
597
|
+
#ifdef ZSV_EXTRAS
|
|
598
|
+
if (scanner->overwrite.odata.have)
|
|
599
|
+
scanner->get_cell = zsv_get_cell_with_overwrite;
|
|
600
|
+
else
|
|
601
|
+
#endif
|
|
602
|
+
scanner->get_cell = zsv_get_cell_1;
|
|
603
|
+
scanner->data_row_count = 0;
|
|
604
|
+
scanner->opts.row_handler = scanner->opts_orig.row_handler;
|
|
605
|
+
scanner->opts.cell_handler = scanner->opts_orig.cell_handler;
|
|
606
|
+
scanner->opts.ctx = scanner->opts_orig.ctx;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
static void zsv_throwaway_row(void *ctx) {
|
|
611
|
+
struct zsv_scanner *scanner = ctx;
|
|
612
|
+
if (scanner->opts.overflow_row_handler != NULL) {
|
|
613
|
+
if (zsv_cell_count(scanner) > 1 || zsv_get_cell_1(scanner, 0).len > 0)
|
|
614
|
+
scanner->opts.overflow_row_handler(ctx);
|
|
615
|
+
}
|
|
616
|
+
scanner->buffer_exceeded = 0;
|
|
617
|
+
set_callbacks(ctx);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
#ifdef ZSV_EXTRAS
|
|
621
|
+
|
|
622
|
+
static int zsv_have_overwrite(zsv_parser parser, size_t row_ix, size_t col_ix) {
|
|
623
|
+
struct zsv_overwrite *overwrite = &parser->overwrite;
|
|
624
|
+
while (overwrite->odata.have && overwrite->odata.row_ix < row_ix)
|
|
625
|
+
overwrite->next(overwrite->ctx, &overwrite->odata);
|
|
626
|
+
while (overwrite->odata.have && overwrite->odata.row_ix == row_ix && overwrite->odata.col_ix < col_ix)
|
|
627
|
+
overwrite->next(overwrite->ctx, &overwrite->odata);
|
|
628
|
+
if (!overwrite->odata.have)
|
|
629
|
+
parser->get_cell = zsv_get_cell_1;
|
|
630
|
+
return overwrite->odata.have && overwrite->odata.row_ix == row_ix && overwrite->odata.col_ix == col_ix;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
static struct zsv_cell zsv_get_cell_with_overwrite(zsv_parser parser, size_t col_ix) {
|
|
634
|
+
if (VERY_LIKELY(col_ix < parser->row.used)) {
|
|
635
|
+
size_t row_ix = parser->data_row_count;
|
|
636
|
+
if (!zsv_have_overwrite(parser, row_ix, col_ix))
|
|
637
|
+
return parser->row.cells[col_ix];
|
|
638
|
+
|
|
639
|
+
struct zsv_cell c = parser->overwrite.odata.val;
|
|
640
|
+
c.overwritten = 1;
|
|
641
|
+
return c;
|
|
642
|
+
}
|
|
643
|
+
struct zsv_cell c = {0, 0, 0, 0};
|
|
644
|
+
return c;
|
|
645
|
+
}
|
|
646
|
+
#endif
|
|
647
|
+
|
|
648
|
+
static int zsv_scanner_init(struct zsv_scanner *scanner, struct zsv_opts *opts) {
|
|
649
|
+
size_t need_buff_size = 0;
|
|
650
|
+
scanner->errprintf = opts->errprintf ? opts->errprintf : zsv_generic_fprintf;
|
|
651
|
+
scanner->errf = opts->errf ? opts->errf : stderr;
|
|
652
|
+
scanner->errclose = opts->errclose;
|
|
653
|
+
if (opts->malformed_utf8_replace == ZSV_MALFORMED_UTF8_DO_NOT_REPLACE)
|
|
654
|
+
opts->malformed_utf8_replace = 0;
|
|
655
|
+
if (opts->buffsize < opts->max_row_size * 2)
|
|
656
|
+
need_buff_size = opts->max_row_size * 2;
|
|
657
|
+
opts->delimiter = opts->delimiter ? opts->delimiter : ',';
|
|
658
|
+
if (opts->delimiter == '\n' || opts->delimiter == '\r' || opts->delimiter == '"') {
|
|
659
|
+
scanner->errprintf(scanner->errf, "warning: ignoring illegal delimiter\n");
|
|
660
|
+
opts->delimiter = ',';
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
if (opts->insert_header_row)
|
|
664
|
+
scanner->insert_string = opts->insert_header_row;
|
|
665
|
+
|
|
666
|
+
if (need_buff_size < ZSV_MIN_SCANNER_BUFFSIZE)
|
|
667
|
+
need_buff_size = ZSV_MIN_SCANNER_BUFFSIZE;
|
|
668
|
+
if (opts->buffsize < need_buff_size) {
|
|
669
|
+
if (opts->buffsize > 0) {
|
|
670
|
+
if (need_buff_size == ZSV_MIN_SCANNER_BUFFSIZE)
|
|
671
|
+
scanner->errprintf(scanner->errf, "Increasing --buff-size to minimum %zu\n", need_buff_size);
|
|
672
|
+
else
|
|
673
|
+
scanner->errprintf(scanner->errf, "Increasing --buff-size to %zu to accommmodate max-row-size of %u\n",
|
|
674
|
+
need_buff_size, opts->max_row_size);
|
|
675
|
+
}
|
|
676
|
+
opts->buffsize = need_buff_size;
|
|
677
|
+
}
|
|
678
|
+
if (opts->buffsize == 0)
|
|
679
|
+
opts->buffsize = ZSV_DEFAULT_SCANNER_BUFFSIZE;
|
|
680
|
+
else if (opts->buffsize < ZSV_MIN_SCANNER_BUFFSIZE)
|
|
681
|
+
opts->buffsize = ZSV_MIN_SCANNER_BUFFSIZE;
|
|
682
|
+
|
|
683
|
+
scanner->in = opts->stream;
|
|
684
|
+
if (!opts->read) {
|
|
685
|
+
scanner->read = (zsv_generic_read)fread;
|
|
686
|
+
if (!opts->stream)
|
|
687
|
+
scanner->in = stdin;
|
|
688
|
+
} else {
|
|
689
|
+
scanner->read = opts->read;
|
|
690
|
+
scanner->in = opts->stream;
|
|
691
|
+
}
|
|
692
|
+
scanner->buff.buff = opts->buff;
|
|
693
|
+
scanner->buff.size = opts->buffsize;
|
|
694
|
+
|
|
695
|
+
if (opts->buffsize && !opts->buff) {
|
|
696
|
+
scanner->buff.buff = malloc(opts->buffsize);
|
|
697
|
+
scanner->free_buff = 1;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
#ifdef ZSV_EXTRAS
|
|
701
|
+
if (opts->max_rows)
|
|
702
|
+
scanner->progress.max_rows = opts->max_rows;
|
|
703
|
+
#endif
|
|
704
|
+
if (scanner->buff.buff) {
|
|
705
|
+
scanner->opts = *opts;
|
|
706
|
+
scanner->opts_orig = *opts;
|
|
707
|
+
if (!scanner->opts.max_columns)
|
|
708
|
+
scanner->opts.max_columns = 1024;
|
|
709
|
+
set_callbacks(scanner);
|
|
710
|
+
if ((scanner->row.allocated = scanner->opts.max_columns) &&
|
|
711
|
+
(scanner->row.cells = calloc(scanner->row.allocated, sizeof(*scanner->row.cells)))) {
|
|
712
|
+
#ifdef ZSV_EXTRAS
|
|
713
|
+
// initialize overwrites
|
|
714
|
+
if (scanner->opts.overwrite.open && !scanner->opts.overwrite.cancel) {
|
|
715
|
+
if (scanner->opts.overwrite.open(scanner->opts.overwrite.ctx) == zsv_status_ok) {
|
|
716
|
+
scanner->overwrite.odata.have = 1;
|
|
717
|
+
scanner->overwrite.next = scanner->opts.overwrite.next;
|
|
718
|
+
scanner->overwrite.close = scanner->opts.overwrite.close;
|
|
719
|
+
scanner->overwrite.ctx = scanner->opts.overwrite.ctx;
|
|
720
|
+
// load the first overwrite
|
|
721
|
+
scanner->overwrite.next(scanner->overwrite.ctx, &scanner->overwrite.odata);
|
|
722
|
+
return 0;
|
|
723
|
+
}
|
|
724
|
+
return 1;
|
|
725
|
+
}
|
|
726
|
+
#endif
|
|
727
|
+
return 0;
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
return 1;
|
|
731
|
+
}
|