zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Tai Chi Minh Ralph Eastwood, Matt Wong and Guarnerix dba Liquidaty
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#ifndef ZSV_API_H
|
|
10
|
+
#define ZSV_API_H
|
|
11
|
+
|
|
12
|
+
#define ZSV_ROW_MAX_SIZE_DEFAULT 65536
|
|
13
|
+
#define ZSV_ROW_MAX_SIZE_DEFAULT_S "64k"
|
|
14
|
+
|
|
15
|
+
#define ZSV_MAX_COLS_DEFAULT 1024
|
|
16
|
+
|
|
17
|
+
#define ZSV_ROW_MAX_SIZE_MIN 1024
|
|
18
|
+
#define ZSV_ROW_MAX_SIZE_MIN_S "1024"
|
|
19
|
+
|
|
20
|
+
#define ZSV_MIN_SCANNER_BUFFSIZE 4096
|
|
21
|
+
#define ZSV_DEFAULT_SCANNER_BUFFSIZE (1 << 18) // 256k
|
|
22
|
+
|
|
23
|
+
#include "zsv_export.h"
|
|
24
|
+
/*****************************************************************************
|
|
25
|
+
* libzsv API
|
|
26
|
+
*
|
|
27
|
+
* Functions provided by the zsv library are described herein. This document is
|
|
28
|
+
* organized into three sections:
|
|
29
|
+
* - required functions. any time libzsv is used and any input at all is parsed,
|
|
30
|
+
* each of these functions should be used (usually, exactly once)
|
|
31
|
+
* - minimal access functions. these functions are generally necessary to use
|
|
32
|
+
* libzsv for any non-trivial task
|
|
33
|
+
* - other functions
|
|
34
|
+
******************************************************************************/
|
|
35
|
+
|
|
36
|
+
/******************************************************************************
|
|
37
|
+
* Required functions:
|
|
38
|
+
* - zsv_new(): allocate a parser
|
|
39
|
+
* - zsv_parse_more(): parse some data
|
|
40
|
+
* - zsv_finish(): tie up loose ends
|
|
41
|
+
* - zsv_delete(): dispose the parser
|
|
42
|
+
******************************************************************************/
|
|
43
|
+
|
|
44
|
+
ZSV_EXPORT
|
|
45
|
+
int zsv_peek(zsv_parser);
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Create a zsv parser. Typically, passed options will at least include a
|
|
49
|
+
* a `row_handler()` callback. Many, but not all, options can be subsequently
|
|
50
|
+
* set or modified after calling `zsv_new()`
|
|
51
|
+
*
|
|
52
|
+
* @param options see `struct zsv_opts` in common.h
|
|
53
|
+
* @returns zsv parser handle
|
|
54
|
+
*/
|
|
55
|
+
ZSV_EXPORT
|
|
56
|
+
zsv_parser zsv_new(struct zsv_opts *opts);
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Parse the next chunk of data from the input stream:
|
|
60
|
+
* - Immediately after a cell (column) delimiter is parsed, the configured
|
|
61
|
+
* `cell_handler()` callback, if any, is called
|
|
62
|
+
* - Immediately after a row delimiter is parsed, the configured
|
|
63
|
+
* `row_handler()` callback, if any, is called.
|
|
64
|
+
*
|
|
65
|
+
* @param parser
|
|
66
|
+
* @returns zsv_status_ok if more data remains to be parsed,
|
|
67
|
+
* zsv_status_no_more_input if the stream's EOF has been reached,
|
|
68
|
+
* or other zsv status code in the event of error or cancellation
|
|
69
|
+
*/
|
|
70
|
+
ZSV_EXPORT enum zsv_status zsv_parse_more(zsv_parser parser);
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Finish any remaining processing, after all input has been read
|
|
74
|
+
*/
|
|
75
|
+
ZSV_EXPORT enum zsv_status zsv_finish(zsv_parser);
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Dispose of a parser that was created with `zsv_new()` or `zsv_new_with_properties()`
|
|
79
|
+
*/
|
|
80
|
+
ZSV_EXPORT enum zsv_status zsv_delete(zsv_parser);
|
|
81
|
+
|
|
82
|
+
/******************************************************************************
|
|
83
|
+
* minimal access functions:
|
|
84
|
+
* - zsv_cell_count(): get the number of cells in the row
|
|
85
|
+
* - zsv_get_cell(): retrieve a cell value
|
|
86
|
+
******************************************************************************/
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Get the number of cells in the row that was just parsed. This function
|
|
90
|
+
* is typically called from within your `row_handler()` callback. In the event
|
|
91
|
+
* that the last row did not contain a single cell delimiter, returns 1
|
|
92
|
+
*
|
|
93
|
+
* @param parser
|
|
94
|
+
* @returns number, >= 1, of cells in the row that was just parsed
|
|
95
|
+
*/
|
|
96
|
+
ZSV_EXPORT
|
|
97
|
+
size_t zsv_cell_count(zsv_parser parser);
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Get the contents of a cell in the row that was just parsed. See `struct zsv_cell`
|
|
101
|
+
* in common.h for further details
|
|
102
|
+
*
|
|
103
|
+
* @param parser
|
|
104
|
+
* @param index zero-based index of the cell to fetch
|
|
105
|
+
* @return `zsv_cell` structure with the bytes and length of this cell value
|
|
106
|
+
*
|
|
107
|
+
* An example of a `row_handler()` loop to print each cell in a row might be:
|
|
108
|
+
* ```
|
|
109
|
+
* size_t cell_count = zsv_cell_count(parser);
|
|
110
|
+
* for(size_t i = 0; i < cell_count; i++) {
|
|
111
|
+
* struct zsv_cell c = zsv_get_cell(parser, i);
|
|
112
|
+
* printf("%.*s", c.len, (const char *)c.str);
|
|
113
|
+
* }
|
|
114
|
+
* ```
|
|
115
|
+
*/
|
|
116
|
+
struct zsv_cell zsv_get_cell(zsv_parser parser, size_t index);
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* `zsv_get_cell_len()` is not needed in most cases, but may be useful in
|
|
120
|
+
* restrictive cases such as when calling from Javascript into wasm
|
|
121
|
+
*
|
|
122
|
+
* @param parser
|
|
123
|
+
* @param ix 0-based index of the cell to copy
|
|
124
|
+
* @return length of cell contents (0 if cell is empty)
|
|
125
|
+
*/
|
|
126
|
+
ZSV_EXPORT
|
|
127
|
+
size_t zsv_get_cell_len(zsv_parser parser, size_t ix);
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* get a pointer to the cell contents (not NULL-terminated)
|
|
131
|
+
* @param parser
|
|
132
|
+
* @param ix 0-based index of the cell to copy. Caller must ensure validity
|
|
133
|
+
* @return pointer to the cell contents, or NULL if contents are empty
|
|
134
|
+
*/
|
|
135
|
+
ZSV_EXPORT
|
|
136
|
+
unsigned char *zsv_get_cell_str(zsv_parser parser, size_t ix);
|
|
137
|
+
|
|
138
|
+
/******************************************************************************
|
|
139
|
+
* other functions
|
|
140
|
+
******************************************************************************/
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Get the library version
|
|
144
|
+
*/
|
|
145
|
+
ZSV_EXPORT
|
|
146
|
+
const char *zsv_lib_version(void);
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Change a parser's row handler. This function may be called at any time
|
|
150
|
+
* during the parsing process to change the row handler that is called each
|
|
151
|
+
* time a row is parsed
|
|
152
|
+
*
|
|
153
|
+
* @param parser
|
|
154
|
+
* @param row_handler new callback value
|
|
155
|
+
*/
|
|
156
|
+
ZSV_EXPORT void zsv_set_row_handler(zsv_parser, void (*row_handler)(void *ctx));
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Check if the row we just parsed consisted entirely of blank data
|
|
160
|
+
*
|
|
161
|
+
* @param parser
|
|
162
|
+
* @return non-zero if blank, 0 if non-blank
|
|
163
|
+
*/
|
|
164
|
+
ZSV_EXPORT
|
|
165
|
+
char zsv_row_is_blank(zsv_parser parser);
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Set the context pointer that is passed to our callbacks
|
|
169
|
+
* @param parser
|
|
170
|
+
* @param ctx new context pointer value
|
|
171
|
+
*/
|
|
172
|
+
ZSV_EXPORT
|
|
173
|
+
void zsv_set_context(zsv_parser parser, void *ctx);
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Set the read function that is invoked by `zsv_parse_more()` to fetch more data.
|
|
177
|
+
* If not explicitly set, defaults to fread
|
|
178
|
+
*
|
|
179
|
+
* @param parser
|
|
180
|
+
* @param read_function
|
|
181
|
+
* @param stream value that is passed to read_function when it is called
|
|
182
|
+
*/
|
|
183
|
+
ZSV_EXPORT
|
|
184
|
+
void zsv_set_read(zsv_parser parser, size_t (*read_func)(void *restrict, size_t n, size_t size, void *restrict));
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Set the input stream our parser reads from. If not explicity set, defaults to
|
|
188
|
+
* stdin. This can be used to read multiple inputs as a single combined input
|
|
189
|
+
* by calling `zsv_set_input()` after `zsv_parse_more()` returns
|
|
190
|
+
* `zsv_status_no_more_input`
|
|
191
|
+
*/
|
|
192
|
+
ZSV_EXPORT
|
|
193
|
+
void zsv_set_input(zsv_parser, void *in);
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Insert a filter to process or modify, before parsing, the next chunk of raw
|
|
197
|
+
* bytes read from the input stream. For example, to save a copy of the raw
|
|
198
|
+
* input to a file, `zsv_set_scan_filter()` could be called with
|
|
199
|
+
* `zsv_filter_write` passed as the filter argument, and the target FILE *
|
|
200
|
+
* passed as the context pointer.
|
|
201
|
+
*
|
|
202
|
+
* @param parser
|
|
203
|
+
* @param filter callback that is called on each chunk that is read from the
|
|
204
|
+
* input stream, before the chunk is parsed. The callback may
|
|
205
|
+
* modify the contents of the buffer so long as its return value
|
|
206
|
+
* does not exceed the bufflen it was passed
|
|
207
|
+
*/
|
|
208
|
+
ZSV_EXPORT enum zsv_status zsv_set_scan_filter(zsv_parser parser,
|
|
209
|
+
size_t (*filter)(void *ctx, unsigned char *buff, size_t bufflen),
|
|
210
|
+
void *ctx);
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Set parsing mode to fixed-width. Once set to fixed mode, a parser may not be
|
|
214
|
+
* set back to CSV mode
|
|
215
|
+
* @return status code
|
|
216
|
+
* @param parser parser handle
|
|
217
|
+
* @param count number of elements in offsets
|
|
218
|
+
* @param offsets array of cell-end offsets. offsets[0] should be the length of the first cell
|
|
219
|
+
*/
|
|
220
|
+
ZSV_EXPORT enum zsv_status zsv_set_fixed_offsets(zsv_parser parser, size_t count, size_t *offsets);
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Parse a buffer of bytes. This function is usually not needed, but
|
|
224
|
+
* can be used to parse in a push instead of pull manner
|
|
225
|
+
*
|
|
226
|
+
* @param parser
|
|
227
|
+
* @param buff the input buffer to parse. This buffer may not overlap with
|
|
228
|
+
* the parser buffer!
|
|
229
|
+
* @param len length of the input to parse
|
|
230
|
+
*/
|
|
231
|
+
ZSV_EXPORT enum zsv_status zsv_parse_bytes(zsv_parser parser, const unsigned char *restrict buff, size_t len);
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Get a text description of a status code
|
|
235
|
+
*/
|
|
236
|
+
ZSV_EXPORT
|
|
237
|
+
const unsigned char *zsv_parse_status_desc(enum zsv_status status);
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Abort parsing. After this function is called, no further
|
|
241
|
+
* `row_handler()` or `cell_handler()` calls will be made, and parse functions
|
|
242
|
+
* will return zsv_status_cancelled
|
|
243
|
+
*/
|
|
244
|
+
ZSV_EXPORT
|
|
245
|
+
void zsv_abort(zsv_parser);
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* @return number of bytes scanned from the last zsv_parse_more() invocation
|
|
249
|
+
*/
|
|
250
|
+
ZSV_EXPORT size_t zsv_scanned_length(zsv_parser);
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* @return cumulative number of bytes scanned across all requests by this parser
|
|
254
|
+
*/
|
|
255
|
+
ZSV_EXPORT size_t zsv_cum_scanned_length(zsv_parser parser);
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* @return number of raw bytes scanned from the beginning of the row
|
|
259
|
+
* to the end of this row. Subtract from zsv_cum_scanned_length() to get the
|
|
260
|
+
* position of the beginning of the row
|
|
261
|
+
*/
|
|
262
|
+
ZSV_EXPORT size_t zsv_row_length_raw_bytes(zsv_parser parser);
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Check the quoted status of the last cell that was read. This function is only
|
|
266
|
+
* applicable when called from within a cell_handler() callback. Furthermore, this
|
|
267
|
+
* function is generally only useful when the cell value will subsequent be
|
|
268
|
+
* output in CSV format
|
|
269
|
+
*
|
|
270
|
+
* @returns 0 if value will *not* need to be quoted when output as CSV, or
|
|
271
|
+
* 1 if it might need to be quoted
|
|
272
|
+
*/
|
|
273
|
+
ZSV_EXPORT
|
|
274
|
+
char zsv_quoted(zsv_parser parser);
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Create a zsv_opts structure and return its handle
|
|
278
|
+
*
|
|
279
|
+
* This is only necessary in environments where structures cannot be directly
|
|
280
|
+
* instantiated such as web assembly. Otherwise, you should avoid this function
|
|
281
|
+
* and just create a `struct zsv_opts` on the stack
|
|
282
|
+
*
|
|
283
|
+
* Each argument to `zsv_opts_new()` corresponds to the same-named `struct zsv_opts` element
|
|
284
|
+
* See common.h for details
|
|
285
|
+
*/
|
|
286
|
+
ZSV_EXPORT struct zsv_opts *zsv_opts_new(void (*row_handler)(void *ctx),
|
|
287
|
+
void (*cell_handler)(void *ctx, unsigned char *utf8_value, size_t len),
|
|
288
|
+
void *ctx, zsv_generic_read read, void *stream, unsigned char *buff,
|
|
289
|
+
size_t buffsize, unsigned max_columns, unsigned max_row_size, char delimiter,
|
|
290
|
+
char no_quotes
|
|
291
|
+
#ifdef ZSV_EXTRAS
|
|
292
|
+
,
|
|
293
|
+
size_t max_rows
|
|
294
|
+
#endif
|
|
295
|
+
);
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Destroy an option structure that was created by zsv_opts_new()
|
|
299
|
+
*/
|
|
300
|
+
ZSV_EXPORT void zsv_opts_delete(struct zsv_opts *);
|
|
301
|
+
|
|
302
|
+
/******************************************************************************
|
|
303
|
+
* Pull parsing functions
|
|
304
|
+
******************************************************************************/
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* To use pull parsing, do not use row or cell handlers, handler context
|
|
308
|
+
* or zsv_parse_more(). Instead, call zsv_next_row() until a non-ok result
|
|
309
|
+
* @param parser parser handle
|
|
310
|
+
* @return zsv_status_ok on success, other status code on error
|
|
311
|
+
*/
|
|
312
|
+
ZSV_EXPORT
|
|
313
|
+
enum zsv_status zsv_next_row(zsv_parser parser);
|
|
314
|
+
|
|
315
|
+
/******************************************************************************
|
|
316
|
+
* Miscellaneous functions used by the parser that may have standalone utility
|
|
317
|
+
******************************************************************************/
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Force a string to conform to UTF8 encoding. Replaces any non-conforming utf8
|
|
321
|
+
* with the specified char, or removes from the string (and shortens the string)
|
|
322
|
+
* if replace = 0
|
|
323
|
+
* @param s input string. invalid UTF8 bytes will be overwritten
|
|
324
|
+
* @param n length (in bytes) of input
|
|
325
|
+
* @param replace the character to replace any malformed UTF8 bytes with, or 0
|
|
326
|
+
* to remove and shorten the result
|
|
327
|
+
* @param callback optional callback invoked upon scanning malformed UTF8
|
|
328
|
+
* @param ctx context pointer passed to callback
|
|
329
|
+
* @return length of the valid string
|
|
330
|
+
*/
|
|
331
|
+
ZSV_EXPORT
|
|
332
|
+
size_t zsv_strencode(unsigned char *s, size_t n, unsigned char replace,
|
|
333
|
+
int (*malformed_handler)(void *, const unsigned char *s, size_t n, size_t offset),
|
|
334
|
+
void *handler_ctx);
|
|
335
|
+
|
|
336
|
+
#endif
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Tai Chi Minh Ralph Eastwood, Matt Wong and Guarnerix dba Liquidaty
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#ifndef ZSV_COMMON_H
|
|
10
|
+
#define ZSV_COMMON_H
|
|
11
|
+
|
|
12
|
+
#ifdef __cplusplus
|
|
13
|
+
#define ZSV_BEGIN_DECL extern "C" {
|
|
14
|
+
#define ZSV_END_DECL }
|
|
15
|
+
#else
|
|
16
|
+
#define ZSV_BEGIN_DECL
|
|
17
|
+
#define ZSV_END_DECL /* empty */
|
|
18
|
+
#endif
|
|
19
|
+
|
|
20
|
+
enum zsv_status {
|
|
21
|
+
zsv_status_ok = 0,
|
|
22
|
+
zsv_status_cancelled,
|
|
23
|
+
zsv_status_no_more_input,
|
|
24
|
+
zsv_status_invalid_option,
|
|
25
|
+
zsv_status_memory,
|
|
26
|
+
zsv_status_error,
|
|
27
|
+
zsv_status_row,
|
|
28
|
+
zsv_status_done = 100
|
|
29
|
+
#ifdef ZSV_EXTRAS
|
|
30
|
+
,
|
|
31
|
+
zsv_status_max_rows_read = 999
|
|
32
|
+
#endif
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* `zsv_parser` is the type of a zsv parser handle
|
|
37
|
+
*/
|
|
38
|
+
typedef struct zsv_scanner *zsv_parser;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Structure returned by `zsv_get_cell()` for fetching a parsed CSV cell value
|
|
42
|
+
*/
|
|
43
|
+
struct zsv_cell {
|
|
44
|
+
/**
|
|
45
|
+
* address of cell contents (not null-terminated)
|
|
46
|
+
*/
|
|
47
|
+
unsigned char *str;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* length of cell contents
|
|
51
|
+
*/
|
|
52
|
+
size_t len;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* bitfield values for `quoted` flags
|
|
56
|
+
*/
|
|
57
|
+
#define ZSV_PARSER_QUOTE_NONE 0 /* content does not need to be quoted */
|
|
58
|
+
#define ZSV_PARSER_QUOTE_UNCLOSED 1 /* only used internally by parser */
|
|
59
|
+
#define ZSV_PARSER_QUOTE_CLOSED 2 /* value was quoted */
|
|
60
|
+
#define ZSV_PARSER_QUOTE_NEEDED 4 /* value contains delimiter or dbl-quote */
|
|
61
|
+
#define ZSV_PARSER_QUOTE_EMBEDDED 8 /* value contains dbl-quote */
|
|
62
|
+
#define ZSV_PARSER_QUOTE_PENDING 16 /* only used internally by parser */
|
|
63
|
+
#define ZSV_PARSER_QUOTE_PENDING_LF 32 /* only used internally by parser */
|
|
64
|
+
/**
|
|
65
|
+
* quoted flags enable additional efficiency, in particular when input data will
|
|
66
|
+
* be output as text (csv, json etc), by indicating whether the cell contents may
|
|
67
|
+
* require special handling. For example, if the caller will output the cell value as
|
|
68
|
+
* CSV and quoted == 0, the caller need not scan the cell contents to check if
|
|
69
|
+
* quoting or escaping will be required
|
|
70
|
+
*/
|
|
71
|
+
char quoted;
|
|
72
|
+
unsigned char overwritten : 1;
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
typedef size_t (*zsv_generic_write)(const void *restrict, size_t, size_t, void *restrict);
|
|
76
|
+
typedef size_t (*zsv_generic_read)(void *restrict, size_t n, size_t size, void *restrict);
|
|
77
|
+
typedef int (*zsv_generic_seek)(void *, long, int);
|
|
78
|
+
|
|
79
|
+
#define zsv_generic_fprintf (int (*)(void *, const char *, ...)) fprintf;
|
|
80
|
+
#define zsv_generic_fclose (int (*)(void *)) fclose;
|
|
81
|
+
|
|
82
|
+
#ifdef ZSV_EXTRAS
|
|
83
|
+
/**
|
|
84
|
+
* progress callback function signature
|
|
85
|
+
* @param context pointer set in parser opts.progress.ctx
|
|
86
|
+
* @param cumulative_row_count number of input rows read so far
|
|
87
|
+
* @return zero to continue processing, non-zero to cancel parse
|
|
88
|
+
*/
|
|
89
|
+
typedef int (*zsv_progress_callback)(void *ctx, size_t cumulative_row_count);
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* completed callback function signature
|
|
93
|
+
* @param context pointer set in parser opts.progress.ctx
|
|
94
|
+
* @param exit code
|
|
95
|
+
*/
|
|
96
|
+
typedef void (*zsv_completed_callback)(void *ctx, int code);
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Data can be "overwritten" on-the-fly by providing custom callbacks
|
|
100
|
+
* data from the calling code is passed to the zsv library
|
|
101
|
+
* via the `zsv_overwrite_data` structure
|
|
102
|
+
*/
|
|
103
|
+
struct zsv_overwrite_data {
|
|
104
|
+
size_t row_ix; // 0-based
|
|
105
|
+
size_t col_ix; // 0-based
|
|
106
|
+
size_t timestamp;
|
|
107
|
+
struct zsv_cell val;
|
|
108
|
+
struct zsv_cell author;
|
|
109
|
+
struct zsv_cell old_value;
|
|
110
|
+
char have; // 1 = we have unprocessed overwrites
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
struct zsv_opt_overwrite {
|
|
114
|
+
void *ctx;
|
|
115
|
+
enum zsv_status (*open)(void *ctx);
|
|
116
|
+
enum zsv_status (*next)(void *ctx, struct zsv_overwrite_data *odata);
|
|
117
|
+
enum zsv_status (*close)(void *ctx);
|
|
118
|
+
char cancel; // explicitly cancel application of overwrites
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
#endif
|
|
122
|
+
|
|
123
|
+
struct zsv_opts {
|
|
124
|
+
/**
|
|
125
|
+
* Callback that is called for each row that is parsed. In most use cases,
|
|
126
|
+
* this is where most of the code logic resides
|
|
127
|
+
*/
|
|
128
|
+
void (*row_handler)(void *ctx);
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Callback that is called immediately after a cell is parsed.
|
|
132
|
+
* The most common usage pattern is to omit the cell handler, and just loop
|
|
133
|
+
* through each cell in the `row_handler`. But if you prefer, you can use
|
|
134
|
+
* only a cell handler, or both a cell handler and a row handler.
|
|
135
|
+
*/
|
|
136
|
+
void (*cell_handler)(void *ctx, unsigned char *utf8_value, size_t len);
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* If a row was too long to fit in the allocated memory, then
|
|
140
|
+
* if `overflow_row_handler` is set, it will be called after the remaining
|
|
141
|
+
* portion of the row is parsed. For example, if a row consists of
|
|
142
|
+
* XXXXX,YYYYY and there is only enough memory to hold XXXX
|
|
143
|
+
* then `row_handler()` will be called after parsing `XXXX,` and
|
|
144
|
+
* `overflow_row_handler()` called twice, after parsing `X,YY` and `YYY`,
|
|
145
|
+
* before the subsequent row is parsed
|
|
146
|
+
*
|
|
147
|
+
* Note: we considered adding a callback that would be called before the initial
|
|
148
|
+
* `row_handler()` call, in the event that a subsequent `overflow_row_handler()`
|
|
149
|
+
* call was anticipated. Because this scenario occurs so infrequently, we
|
|
150
|
+
* decided to keep it simple with a single callback. But we may reconsider
|
|
151
|
+
* if there is demand for that (or another) alternative approach
|
|
152
|
+
*/
|
|
153
|
+
void (*overflow_row_handler)(void *ctx);
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* The context that is passed to each of our handlers
|
|
157
|
+
*/
|
|
158
|
+
void *ctx;
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Caller can specify its own read function for fetching data to be parsed
|
|
162
|
+
* If not specified, the default value is `fread()`
|
|
163
|
+
*/
|
|
164
|
+
zsv_generic_read read;
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Caller can specify its own seek function for setting the file position
|
|
168
|
+
* with zsv_index_seek. If not specified, the default value is `fseek()`
|
|
169
|
+
*/
|
|
170
|
+
zsv_generic_seek seek;
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Caller can specify its own stream that is passed to the read function
|
|
174
|
+
* If not specified, the default value is stdin
|
|
175
|
+
*/
|
|
176
|
+
void *stream;
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Caller can specify its own buffer for the parser to use of at least
|
|
180
|
+
* ZSV_MIN_SCANNER_BUFFSIZE (4096) in size. If not provided, an internal
|
|
181
|
+
* buffer is allocated
|
|
182
|
+
*/
|
|
183
|
+
unsigned char *buff;
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* If caller specifies its own buffer, this should be its size
|
|
187
|
+
* Otherwise, this can be specified as the size of the internal buffer that
|
|
188
|
+
* will be created, subject to increase if/as appropriate if max_row_size
|
|
189
|
+
* is specified. Defaults to 256k
|
|
190
|
+
*
|
|
191
|
+
* cli option: -B,--buff-size
|
|
192
|
+
*/
|
|
193
|
+
size_t buffsize;
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Maximum number of columns to parse. defaults to 1024
|
|
197
|
+
*
|
|
198
|
+
* cli option: -c,--max-column-count
|
|
199
|
+
*/
|
|
200
|
+
unsigned max_columns;
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* maximum row size can be used as an alternative way to specify the
|
|
204
|
+
* internal buffer size, which will be at least as large as max_row_size * 2
|
|
205
|
+
*
|
|
206
|
+
* cli option: -r,--max-row-size
|
|
207
|
+
*/
|
|
208
|
+
unsigned max_row_size;
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* delimiter: typically a comma or tab
|
|
212
|
+
* can be any char other than newline, form feed or quote
|
|
213
|
+
* defaults to comma
|
|
214
|
+
*
|
|
215
|
+
* cli option: -t,--tab-delim or -O,--other-delim <delim>
|
|
216
|
+
*/
|
|
217
|
+
char delimiter;
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* no_quotes: if > 0, this flag indicates that the parser should treat double-quotes
|
|
221
|
+
* just like any ordinary character
|
|
222
|
+
* defaults to 0
|
|
223
|
+
*
|
|
224
|
+
* cli option: -q,--no-quote
|
|
225
|
+
*/
|
|
226
|
+
char no_quotes;
|
|
227
|
+
|
|
228
|
+
#ifndef ZSV_NO_ONLY_CRLF
|
|
229
|
+
/**
|
|
230
|
+
* only_crlf_rowend: if non-zero, *only* accept CRLF as row end
|
|
231
|
+
*
|
|
232
|
+
* cli option: --only-crlf
|
|
233
|
+
*/
|
|
234
|
+
char only_crlf_rowend;
|
|
235
|
+
#endif
|
|
236
|
+
/**
|
|
237
|
+
* flag to print more verbose messages to the console
|
|
238
|
+
* cli option: -v,--verbose
|
|
239
|
+
*/
|
|
240
|
+
char verbose;
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* if the actual data does not have a header row with column names, the caller
|
|
244
|
+
* should provide one (in CSV format) which will be treated as if it was the
|
|
245
|
+
* first row of data
|
|
246
|
+
*
|
|
247
|
+
* cli option: -0,--header-row
|
|
248
|
+
*/
|
|
249
|
+
const char *insert_header_row;
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* number of rows that the header row spans. If 0 or 1, header is assumed to span 1 row
|
|
253
|
+
* otherwise, set to number > 1 to span multiple rows
|
|
254
|
+
*/
|
|
255
|
+
unsigned int header_span;
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* number of rows to ignore before the initial row is processed
|
|
259
|
+
*/
|
|
260
|
+
unsigned int rows_to_ignore;
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* by default, zsv ignores empty header rows; the number of ignored rows
|
|
264
|
+
* can be fetched via `zsv_get_empty_header_rows()`). To disable this
|
|
265
|
+
* behavior, set `keep_empty_header_rows` to 1
|
|
266
|
+
*/
|
|
267
|
+
unsigned char keep_empty_header_rows;
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* by default, zsv ignores malformed UTF8; set malformed_utf8_replace to
|
|
271
|
+
* a value between 1 and 127 to replace malformed UTF8 with that single
|
|
272
|
+
* char, to ZSV_MALFORMED_UTF8_REMOVE to remove
|
|
273
|
+
* or ZSV_MALFORMED_UTF8_DO_NOT_REPLACE to explicitly leave untouched
|
|
274
|
+
*/
|
|
275
|
+
#define ZSV_MALFORMED_UTF8_DO_NOT_REPLACE -2
|
|
276
|
+
#define ZSV_MALFORMED_UTF8_REMOVE -1
|
|
277
|
+
char malformed_utf8_replace;
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* `overrides` is a bitfield that indicates what ZSV options, if any, were
|
|
281
|
+
* specifically set in the command invocation and is used to ensure
|
|
282
|
+
* that option values set in the command invocation take priority over
|
|
283
|
+
* default values, or values saved in related property values such as
|
|
284
|
+
* .zsv/data/<filename>/props.json
|
|
285
|
+
*
|
|
286
|
+
* For example, if a file has a saved header row span of 2, but the
|
|
287
|
+
* command-line arguments explicitly included `--header-row-span 3`,
|
|
288
|
+
* then setting header_span to 3 and setting overrides.header_row_span
|
|
289
|
+
* ensures that the value of 3 is used
|
|
290
|
+
*/
|
|
291
|
+
struct {
|
|
292
|
+
unsigned char header_row_span : 1;
|
|
293
|
+
unsigned char skip_head : 1;
|
|
294
|
+
unsigned char max_column_count : 1;
|
|
295
|
+
unsigned char malformed_utf8_replacement : 1;
|
|
296
|
+
unsigned char _ : 4;
|
|
297
|
+
} option_overrides;
|
|
298
|
+
|
|
299
|
+
int (*errprintf)(void *ctx, const char *format, ...);
|
|
300
|
+
void *errf;
|
|
301
|
+
int (*errclose)(void *ctx);
|
|
302
|
+
|
|
303
|
+
#ifdef ZSV_EXTRAS
|
|
304
|
+
struct {
|
|
305
|
+
/**
|
|
306
|
+
* min number of rows between progress callback calls
|
|
307
|
+
*/
|
|
308
|
+
size_t rows_interval;
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* min number of seconds b/w callback calls
|
|
312
|
+
*/
|
|
313
|
+
unsigned int seconds_interval;
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Progress callback, called periodically to provide progress updates
|
|
317
|
+
*/
|
|
318
|
+
zsv_progress_callback callback;
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Context passed to the callback, when the callback is invoked
|
|
322
|
+
*/
|
|
323
|
+
void *ctx;
|
|
324
|
+
} progress;
|
|
325
|
+
|
|
326
|
+
struct {
|
|
327
|
+
/**
|
|
328
|
+
* Optional callback. If set, it is called by zsv_finish()
|
|
329
|
+
*/
|
|
330
|
+
zsv_completed_callback callback;
|
|
331
|
+
/**
|
|
332
|
+
* Context passed to the callback, when the callback is invoked
|
|
333
|
+
*/
|
|
334
|
+
void *ctx;
|
|
335
|
+
} completed;
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* maximum number of rows to parse (including any header rows)
|
|
339
|
+
*/
|
|
340
|
+
size_t max_rows;
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* If non-zero, automatically apply overwrites located in
|
|
344
|
+
* /path/to/.zsv/data/my-data.csv/overwrite.sqlite3 for a given
|
|
345
|
+
* input /path/to/my-data.csv
|
|
346
|
+
*
|
|
347
|
+
* This flag is only used by zsv_new_with_properties()
|
|
348
|
+
* if using zsv_new(), this flag is ignored (use the `overwrite` structure instead)
|
|
349
|
+
*/
|
|
350
|
+
char overwrite_auto;
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Optional cell-level values that overwrite data returned to the caller by the API
|
|
354
|
+
* Use when not using overwrite_auto together with zsv_new_with_properties()
|
|
355
|
+
*/
|
|
356
|
+
struct zsv_opt_overwrite overwrite;
|
|
357
|
+
|
|
358
|
+
#endif /* ZSV_EXTRAS */
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
#endif
|