zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
|
|
11
|
+
static int main_help(int argc, const char *argv[]) {
|
|
12
|
+
(void)(argc);
|
|
13
|
+
(void)(argv);
|
|
14
|
+
|
|
15
|
+
FILE *f = stdout;
|
|
16
|
+
static const char *usage[] = {
|
|
17
|
+
"zsv: streaming csv processor",
|
|
18
|
+
"",
|
|
19
|
+
"Usage:",
|
|
20
|
+
" zsv version: display version info (and if applicable, extension info)",
|
|
21
|
+
#ifndef __EMSCRIPTEN__
|
|
22
|
+
" zsv (un)register [<extension_id>] : (un)register an extension",
|
|
23
|
+
" Registration info is saved in zsv.ini located in a directory determined as:",
|
|
24
|
+
" ZSV_CONFIG_DIR environment variable value, if set",
|
|
25
|
+
#if defined(_WIN32)
|
|
26
|
+
" LOCALAPPDATA environment variable value, if set",
|
|
27
|
+
" otherwise, C:\\temp",
|
|
28
|
+
#else
|
|
29
|
+
" otherwise, " PREFIX "/etc",
|
|
30
|
+
#endif
|
|
31
|
+
#endif
|
|
32
|
+
" zsv help [<command>]",
|
|
33
|
+
" zsv <command> <options> <arguments> : run a command on data (see below for details)",
|
|
34
|
+
#ifndef __EMSCRIPTEN__
|
|
35
|
+
" zsv <id>-<cmd> <options> <arguments> : invoke command 'cmd' of extension 'id'",
|
|
36
|
+
" zsv license [<extension_id>]",
|
|
37
|
+
#endif
|
|
38
|
+
" zsv thirdparty : view third-party licenses & acknowledgements",
|
|
39
|
+
"",
|
|
40
|
+
"Options common to all commands except `prop`, `rm` and `jq`:",
|
|
41
|
+
#ifdef ZSV_EXTRAS
|
|
42
|
+
" -L,--limit-rows <n> : limit processing to the given number of rows (including any header row(s))",
|
|
43
|
+
#endif
|
|
44
|
+
" -c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024",
|
|
45
|
+
" -r,--max-row-size <n> : set the minimum supported maximum row size. defaults to 64k",
|
|
46
|
+
" -B,--buff-size <n> : set internal buffer size. defaults to 256k",
|
|
47
|
+
" -t,--tab-delim : set column delimiter to tab",
|
|
48
|
+
" -O,--other-delim <char> : set column delimiter to specified character",
|
|
49
|
+
" -q,--no-quote : turn off quote handling",
|
|
50
|
+
" -R,--skip-head <n> : skip specified number of initial rows",
|
|
51
|
+
" -d,--header-row-span <n> : apply header depth (rowspan) of n",
|
|
52
|
+
" -u,--malformed-utf8-replacement <string>: replacement string (can be empty) in case of malformed UTF8 input",
|
|
53
|
+
" (default for \"desc\" command is '?')",
|
|
54
|
+
" -S,--keep-blank-headers : disable default behavior of ignoring leading blank rows",
|
|
55
|
+
" -0,--header-row <header> : insert the provided CSV as the first row (in position 0)",
|
|
56
|
+
" e.g. --header-row 'col1,col2,\"my col 3\"'",
|
|
57
|
+
#ifndef ZSV_NO_ONLY_CRLF
|
|
58
|
+
" --only-crlf : only treat CRLF as row delimiter",
|
|
59
|
+
" CR or LF alone are treated as normal chars that do not require quotes",
|
|
60
|
+
#endif
|
|
61
|
+
#ifdef ZSV_EXTRAS
|
|
62
|
+
" -1,--apply-overwrites : automatically apply overwrites saved via `overwrite` command",
|
|
63
|
+
#endif
|
|
64
|
+
" -v,--verbose : verbose output",
|
|
65
|
+
"",
|
|
66
|
+
"Commands that parse CSV or other tabular data:",
|
|
67
|
+
" echo : write tabular input to stdout with optional cell overwrites",
|
|
68
|
+
" check : check for anolomolies (column counts, utf8 encoding etc)",
|
|
69
|
+
" count : print the number of rows",
|
|
70
|
+
" select : extract rows/columns by name or position and perform other basic and 'cleanup' operations",
|
|
71
|
+
" desc : describe each column",
|
|
72
|
+
" sql : run ad-hoc SQL on one or more CSV files",
|
|
73
|
+
" pretty : pretty print for console display",
|
|
74
|
+
" serialize: convert into 3-column format (id, column name, cell value)",
|
|
75
|
+
" flatten : flatten a table consisting of N groups of data, each with 1 or",
|
|
76
|
+
" more rows in the table, into a table of N rows",
|
|
77
|
+
" 2json : convert CSV or sqlite3 db table to json",
|
|
78
|
+
" 2tsv : convert to tab-delimited text",
|
|
79
|
+
" stack : stack tables vertically, aligning columns with common names",
|
|
80
|
+
" paste : horizontally paste two tables together: given inputs X, Y, ... of N rows",
|
|
81
|
+
" compare : compare two or more tables and output differences",
|
|
82
|
+
" overwrite: save, modify or apply overwrites",
|
|
83
|
+
"",
|
|
84
|
+
"Other commands:",
|
|
85
|
+
" 2db : convert json to sqlite3 db",
|
|
86
|
+
" prop : save parsing options associated with a file that are subsequently",
|
|
87
|
+
" applied by default when processing that file",
|
|
88
|
+
" rm : remove a file and its related cache",
|
|
89
|
+
" mv : rename (move) a file and/or its related cache",
|
|
90
|
+
#ifdef USE_JQ
|
|
91
|
+
" jq : run a jq filter on json input",
|
|
92
|
+
#endif
|
|
93
|
+
NULL,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
for (size_t i = 0; usage[i]; i++)
|
|
97
|
+
fprintf(f, "%s\n", usage[i]);
|
|
98
|
+
|
|
99
|
+
#ifndef __EMSCRIPTEN__
|
|
100
|
+
char printed_init = 0;
|
|
101
|
+
struct cli_config config;
|
|
102
|
+
if (!config_init(&config, 1, 1, 0)) {
|
|
103
|
+
for (struct zsv_ext *ext = config.extensions; ext; ext = ext->next) {
|
|
104
|
+
if (ext->inited == zsv_init_ok) {
|
|
105
|
+
if (!printed_init) {
|
|
106
|
+
printed_init = 1;
|
|
107
|
+
fprintf(f, "\nExtended commands:\n");
|
|
108
|
+
} else
|
|
109
|
+
fprintf(f, "\n");
|
|
110
|
+
if (ext->help)
|
|
111
|
+
fprintf(f, " Extension '%s': %s\n", ext->id, ext->help);
|
|
112
|
+
for (struct zsv_ext_command *cmd = ext->commands; cmd; cmd = cmd->next)
|
|
113
|
+
fprintf(f, " %s-%s%s%s\n", ext->id, cmd->id, cmd->help ? ": " : "", cmd->help ? cmd->help : "");
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
config_free(&config);
|
|
117
|
+
}
|
|
118
|
+
if (!printed_init)
|
|
119
|
+
fprintf(f, "\n(No extended commands)\n");
|
|
120
|
+
#endif
|
|
121
|
+
|
|
122
|
+
return 0;
|
|
123
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
static int main_license(int argc, const char *argv[]) {
|
|
10
|
+
(void)(argc);
|
|
11
|
+
(void)(argv);
|
|
12
|
+
|
|
13
|
+
fprintf(stderr, "Note: for third-party licenses & acknowledgements, run `zsv thirdparty`\n");
|
|
14
|
+
printf("\n====================================================\n");
|
|
15
|
+
printf("ZSV/lib license");
|
|
16
|
+
printf(" ");
|
|
17
|
+
printf("\n====================================================\n");
|
|
18
|
+
|
|
19
|
+
fwrite(zsv_license_text_MIT, 1, strlen(zsv_license_text_MIT), stdout);
|
|
20
|
+
|
|
21
|
+
struct cli_config config;
|
|
22
|
+
if (!config_init(&config, 0, 1, 0)) {
|
|
23
|
+
for (struct zsv_ext *ext = config.extensions; ext; ext = ext->next) {
|
|
24
|
+
printf("\n====================================================\n");
|
|
25
|
+
printf("License for extension '%s'", ext->id);
|
|
26
|
+
printf("\n====================================================\n");
|
|
27
|
+
if (ext->license && *ext->license) {
|
|
28
|
+
size_t len = strlen(ext->license);
|
|
29
|
+
fwrite(ext->license, 1, len, stdout);
|
|
30
|
+
if (ext->license[len - 1] != '\n')
|
|
31
|
+
printf("\n");
|
|
32
|
+
} else
|
|
33
|
+
printf("Unknown\n");
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
config_free(&config);
|
|
37
|
+
|
|
38
|
+
return 0;
|
|
39
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Check an extension's implementation for completeness and print any errors or warnings
|
|
11
|
+
* This function does not yet impose any requirements on any extension
|
|
12
|
+
*
|
|
13
|
+
* @param ext the extension to check
|
|
14
|
+
* @return error
|
|
15
|
+
*/
|
|
16
|
+
static int check_extension(struct zsv_ext *ext) {
|
|
17
|
+
/* to do:
|
|
18
|
+
* check that each extension command supports `zsv my-cmd --help`;
|
|
19
|
+
* redirect, examine & restore stdin/out/err
|
|
20
|
+
*/
|
|
21
|
+
if (!ext)
|
|
22
|
+
return 1;
|
|
23
|
+
|
|
24
|
+
ext_init(ext);
|
|
25
|
+
if (ext->inited != zsv_init_ok)
|
|
26
|
+
return 1;
|
|
27
|
+
return 0;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
static int register_help(char do_register) {
|
|
31
|
+
static const char *register_help = "zsv register: register an extension\n\n"
|
|
32
|
+
"usage: zsv register [-g,--global] <extension_id>";
|
|
33
|
+
static const char *unregister_help = "zsv unregister: unregister an extension\n\n"
|
|
34
|
+
"usage: zsv unregister [-g,--global] <extension_id>";
|
|
35
|
+
printf("%s\n", do_register ? register_help : unregister_help);
|
|
36
|
+
return 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
static int main_register_aux(int argc, const char *argv[]) {
|
|
40
|
+
int err = 0;
|
|
41
|
+
struct cli_config config;
|
|
42
|
+
char do_register = *argv[0] == 'r';
|
|
43
|
+
const char *extension_id = NULL;
|
|
44
|
+
char global = 0;
|
|
45
|
+
for (int i = 1; i < argc; i++) {
|
|
46
|
+
const char *arg = argv[i];
|
|
47
|
+
if (!strcmp(arg, "--help") || !strcmp(arg, "-h"))
|
|
48
|
+
return register_help(do_register);
|
|
49
|
+
if (!strcmp(arg, "-g") || !strcmp(arg, "--global"))
|
|
50
|
+
global = 1;
|
|
51
|
+
else {
|
|
52
|
+
if (extension_id != NULL)
|
|
53
|
+
return register_help(do_register);
|
|
54
|
+
extension_id = arg;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (!extension_id)
|
|
59
|
+
fprintf(stderr, "No extension id provided\n"), err = 1;
|
|
60
|
+
else if (strlen(extension_id) < ZSV_EXTENSION_ID_MIN_LEN || strlen(extension_id) > ZSV_EXTENSION_ID_MAX_LEN)
|
|
61
|
+
fprintf(stderr, "Extension id must be 1 to 8 bytes\n"), err = 1;
|
|
62
|
+
else if (config_init_2(&config, !do_register, 1, 1, global, !global))
|
|
63
|
+
config_free(&config); // unable to init config
|
|
64
|
+
else {
|
|
65
|
+
struct zsv_ext *found;
|
|
66
|
+
if ((found = find_extension(&config, extension_id))) {
|
|
67
|
+
// found this extension registered
|
|
68
|
+
if (do_register) {
|
|
69
|
+
fprintf(stderr, "Extension %s already registered\n", extension_id), err = 1;
|
|
70
|
+
check_extension(found);
|
|
71
|
+
}
|
|
72
|
+
// unregister
|
|
73
|
+
else if (!(err = remove_extension(&config.extensions, found)))
|
|
74
|
+
if (!(err = config_save(&config)))
|
|
75
|
+
fprintf(stderr, "Extension %s unregistered\n", extension_id);
|
|
76
|
+
} else {
|
|
77
|
+
// this extension has not been registered
|
|
78
|
+
if (!do_register)
|
|
79
|
+
fprintf(stderr, "Extension %s was not already registered\n", extension_id), err = 1;
|
|
80
|
+
|
|
81
|
+
// register
|
|
82
|
+
else {
|
|
83
|
+
// confirm we can successfully load dll, then register
|
|
84
|
+
// add_extension() adds this extension to the front of the list
|
|
85
|
+
if (!(err = add_extension(extension_id, &config.extensions, 1, 1))) {
|
|
86
|
+
if (!check_extension(config.extensions)) {
|
|
87
|
+
if (!(err = config_save(&config)))
|
|
88
|
+
fprintf(stderr, "Extension %s registered\n", extension_id);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
config_free(&config);
|
|
94
|
+
}
|
|
95
|
+
return err;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
static int main_register(int argc, const char *argv[]) {
|
|
99
|
+
return main_register_aux(argc, argv);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
static int main_unregister(int argc, const char *argv[]) {
|
|
103
|
+
return main_register_aux(argc, argv);
|
|
104
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
static void print_str_array(const char *name, const char *name2, const char **ss) {
|
|
10
|
+
printf("\n\n==========================\n%s%s\n==========================\n", name, name2 ? name2 : "?");
|
|
11
|
+
if (!ss)
|
|
12
|
+
printf("No third-party information\n");
|
|
13
|
+
for (int i = 0; ss && ss[i]; i++) {
|
|
14
|
+
const char *s = ss[i];
|
|
15
|
+
fwrite(s, 1, strlen(s), stdout);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
static int main_thirdparty(int argc, const char *argv[]) {
|
|
20
|
+
(void)(argc);
|
|
21
|
+
(void)(argv);
|
|
22
|
+
|
|
23
|
+
printf("Third-party licenses and acknowledgements");
|
|
24
|
+
print_str_array("ZSV/lib third-party dependencies", "", zsv_thirdparty);
|
|
25
|
+
|
|
26
|
+
#ifndef __EMSCRIPTEN__
|
|
27
|
+
struct cli_config config;
|
|
28
|
+
const char *ss[2];
|
|
29
|
+
ss[1] = NULL;
|
|
30
|
+
if (!config_init(&config, 0, 1, 0)) {
|
|
31
|
+
for (struct zsv_ext *ext = config.extensions; ext; ext = ext->next) {
|
|
32
|
+
if (ext->thirdparty) {
|
|
33
|
+
ss[0] = ext->thirdparty;
|
|
34
|
+
print_str_array("Extension: ", (const char *)ext->id, ss);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
#endif
|
|
39
|
+
printf("\n");
|
|
40
|
+
return 0;
|
|
41
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/* intentionally left blank. see register.c */
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
static int main_version(int argc, const char *argv[]) {
|
|
10
|
+
(void)(argc);
|
|
11
|
+
(void)(argv);
|
|
12
|
+
printf("zsv version %s (lib %s)\n", VERSION, zsv_lib_version());
|
|
13
|
+
return 0;
|
|
14
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/* simdutf_wrapper.h - C-callable wrapper around simdutf */
|
|
2
|
+
|
|
3
|
+
#ifndef SIMDUTF_WRAPPER_H
|
|
4
|
+
#define SIMDUTF_WRAPPER_H
|
|
5
|
+
|
|
6
|
+
#include <stddef.h> /* for size_t */
|
|
7
|
+
|
|
8
|
+
#ifdef __cplusplus
|
|
9
|
+
extern "C" {
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
/* Returns 1 if valid UTF-8, 0 otherwise */
|
|
13
|
+
int simdutf_is_valid_utf8(const char *buf, size_t len);
|
|
14
|
+
|
|
15
|
+
#ifdef __cplusplus
|
|
16
|
+
}
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
#endif /* SIMDUTF_WRAPPER_H */
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#include <stddef.h>
|
|
2
|
+
#include <stdint.h>
|
|
3
|
+
#include <stdbool.h>
|
|
4
|
+
|
|
5
|
+
bool utf8_is_valid(const uint8_t *s, size_t len);
|
|
6
|
+
|
|
7
|
+
/////
|
|
8
|
+
static inline bool is_cont(uint8_t b) {
|
|
9
|
+
// Continuation byte: 10xxxxxx
|
|
10
|
+
return (b & 0xC0) == 0x80;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
bool utf8_is_valid(const uint8_t *s, size_t len) {
|
|
14
|
+
size_t i = 0;
|
|
15
|
+
|
|
16
|
+
while (i < len) {
|
|
17
|
+
uint8_t c = s[i];
|
|
18
|
+
|
|
19
|
+
// ASCII fast path
|
|
20
|
+
if (c < 0x80) {
|
|
21
|
+
i++;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Reject continuation bytes as leading bytes, and C0/C1 overlongs
|
|
26
|
+
if (c < 0xC2) {
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// 2-byte sequence: 110xxxxx 10xxxxxx
|
|
31
|
+
if (c < 0xE0) {
|
|
32
|
+
if (i + 1 >= len)
|
|
33
|
+
return false;
|
|
34
|
+
uint8_t c1 = s[i + 1];
|
|
35
|
+
if (!is_cont(c1))
|
|
36
|
+
return false;
|
|
37
|
+
i += 2;
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// 3-byte sequence: 1110xxxx 10xxxxxx 10xxxxxx
|
|
42
|
+
if (c < 0xF0) {
|
|
43
|
+
if (i + 2 >= len)
|
|
44
|
+
return false;
|
|
45
|
+
uint8_t c1 = s[i + 1];
|
|
46
|
+
uint8_t c2 = s[i + 2];
|
|
47
|
+
|
|
48
|
+
// Special rules to avoid overlongs and surrogates:
|
|
49
|
+
// E0 A0–BF 80–BF
|
|
50
|
+
// E1–EC 80–BF 80–BF
|
|
51
|
+
// ED 80–9F 80–BF (avoid D800–DFFF)
|
|
52
|
+
// EE–EF 80–BF 80–BF
|
|
53
|
+
if (!is_cont(c2))
|
|
54
|
+
return false;
|
|
55
|
+
|
|
56
|
+
switch (c) {
|
|
57
|
+
case 0xE0:
|
|
58
|
+
if (c1 < 0xA0 || c1 > 0xBF)
|
|
59
|
+
return false;
|
|
60
|
+
break;
|
|
61
|
+
case 0xED:
|
|
62
|
+
if (c1 < 0x80 || c1 > 0x9F)
|
|
63
|
+
return false;
|
|
64
|
+
break;
|
|
65
|
+
default:
|
|
66
|
+
if (!is_cont(c1))
|
|
67
|
+
return false;
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
i += 3;
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// 4-byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
76
|
+
// Valid range is up to U+10FFFF:
|
|
77
|
+
// F0 90–BF 80–BF 80–BF
|
|
78
|
+
// F1–F3 80–BF 80–BF 80–BF
|
|
79
|
+
// F4 80–8F 80–BF 80–BF
|
|
80
|
+
if (c < 0xF5) {
|
|
81
|
+
if (i + 3 >= len)
|
|
82
|
+
return false;
|
|
83
|
+
uint8_t c1 = s[i + 1];
|
|
84
|
+
uint8_t c2 = s[i + 2];
|
|
85
|
+
uint8_t c3 = s[i + 3];
|
|
86
|
+
|
|
87
|
+
if (!is_cont(c2) || !is_cont(c3))
|
|
88
|
+
return false;
|
|
89
|
+
|
|
90
|
+
switch (c) {
|
|
91
|
+
case 0xF0:
|
|
92
|
+
// Avoid overlongs: codepoints < 0x10000
|
|
93
|
+
if (c1 < 0x90 || c1 > 0xBF)
|
|
94
|
+
return false;
|
|
95
|
+
break;
|
|
96
|
+
case 0xF4:
|
|
97
|
+
// Avoid values > U+10FFFF
|
|
98
|
+
if (c1 < 0x80 || c1 > 0x8F)
|
|
99
|
+
return false;
|
|
100
|
+
break;
|
|
101
|
+
default:
|
|
102
|
+
if (!is_cont(c1))
|
|
103
|
+
return false;
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
i += 4;
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 0xF5–0xFF are invalid in UTF-8
|
|
112
|
+
return false;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return true;
|
|
116
|
+
}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2025 Liquidaty and zsv contributors. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* This file is part of zsv/lib, distributed under the MIT license as defined at
|
|
5
|
+
* https://opensource.org/licenses/MIT
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <stdio.h>
|
|
9
|
+
#include <errno.h>
|
|
10
|
+
|
|
11
|
+
#define ZSV_COMMAND check
|
|
12
|
+
#include "zsv_command.h"
|
|
13
|
+
|
|
14
|
+
#define _GNU_SOURCE 1
|
|
15
|
+
#include <string.h>
|
|
16
|
+
#include <zsv/utils/arg.h>
|
|
17
|
+
#include <zsv/utils/err.h>
|
|
18
|
+
|
|
19
|
+
#ifdef USE_SIMDUTF
|
|
20
|
+
#include "check/simdutf_wrapper.h"
|
|
21
|
+
#define USAGE_APPNAME APPNAME " (simdutf8)"
|
|
22
|
+
#else
|
|
23
|
+
#include "check/utf8.c"
|
|
24
|
+
#define USAGE_APPNAME APPNAME
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
struct zsv_check_data {
|
|
28
|
+
FILE *in;
|
|
29
|
+
FILE *out;
|
|
30
|
+
const char *input_path;
|
|
31
|
+
zsv_parser parser;
|
|
32
|
+
size_t row_ix;
|
|
33
|
+
size_t column_count;
|
|
34
|
+
int err;
|
|
35
|
+
unsigned char display_row : 1;
|
|
36
|
+
unsigned char check_utf8 : 1;
|
|
37
|
+
unsigned char _ : 6;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
#ifdef USE_SIMDUTF
|
|
41
|
+
#define UTF8VALIDATOR simdutf_is_valid_utf8
|
|
42
|
+
#else
|
|
43
|
+
#define UTF8VALIDATOR utf8_is_valid
|
|
44
|
+
#endif
|
|
45
|
+
|
|
46
|
+
static void zsv_check_row(void *ctx) {
|
|
47
|
+
struct zsv_check_data *data = ctx;
|
|
48
|
+
size_t column_count = zsv_cell_count(data->parser);
|
|
49
|
+
unsigned const char *row_start = NULL;
|
|
50
|
+
size_t row_len;
|
|
51
|
+
if (column_count != data->column_count) {
|
|
52
|
+
fprintf(data->out, "Row %zu column count (%zu) differs from header (%zu)", data->row_ix, column_count,
|
|
53
|
+
data->column_count);
|
|
54
|
+
data->err = 1;
|
|
55
|
+
if (data->display_row && column_count > 0) {
|
|
56
|
+
row_start = zsv_get_cell(data->parser, 0).str;
|
|
57
|
+
struct zsv_cell last_cell = zsv_get_cell(data->parser, column_count - 1);
|
|
58
|
+
row_len = (last_cell.str + last_cell.len - row_start);
|
|
59
|
+
fprintf(data->out, ": %.*s", (int)row_len, row_start);
|
|
60
|
+
}
|
|
61
|
+
fprintf(data->out, "\n");
|
|
62
|
+
}
|
|
63
|
+
if (data->check_utf8) {
|
|
64
|
+
if (!row_start) {
|
|
65
|
+
row_start = zsv_get_cell(data->parser, 0).str;
|
|
66
|
+
struct zsv_cell last_cell = zsv_get_cell(data->parser, column_count - 1);
|
|
67
|
+
row_len = (last_cell.str + last_cell.len - row_start);
|
|
68
|
+
}
|
|
69
|
+
if (row_len > 0 && !UTF8VALIDATOR(row_start, row_len)) {
|
|
70
|
+
data->err = 1;
|
|
71
|
+
fprintf(data->out, "Row %zu invalid utf8", data->row_ix);
|
|
72
|
+
if (data->display_row)
|
|
73
|
+
fprintf(data->out, ": %.*s", (int)row_len, row_start);
|
|
74
|
+
fprintf(data->out, "\n");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
data->row_ix++;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
static void zsv_check_header(void *ctx) {
|
|
81
|
+
struct zsv_check_data *data = ctx;
|
|
82
|
+
data->column_count = zsv_cell_count(data->parser);
|
|
83
|
+
data->row_ix++;
|
|
84
|
+
zsv_set_row_handler(data->parser, zsv_check_row);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
static int zsv_check_usage(void) {
|
|
88
|
+
const char *zsv_check_usage_msg[] = {
|
|
89
|
+
USAGE_APPNAME ": check input for anomalies",
|
|
90
|
+
"",
|
|
91
|
+
"Usage: " APPNAME " <filename>",
|
|
92
|
+
"",
|
|
93
|
+
"Options:",
|
|
94
|
+
" -o,--output <path> : output to specified file path",
|
|
95
|
+
" --display-row : display the row contents with any reported issue",
|
|
96
|
+
// " --utf8 : check for invalid utf8",
|
|
97
|
+
// " --all : run all checks",
|
|
98
|
+
"",
|
|
99
|
+
"If no check options are provided, all checks are run",
|
|
100
|
+
NULL,
|
|
101
|
+
};
|
|
102
|
+
for (size_t i = 0; zsv_check_usage_msg[i]; i++)
|
|
103
|
+
fprintf(stdout, "%s\n", zsv_check_usage_msg[i]);
|
|
104
|
+
return 1;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
static void zsv_check_cleanup(struct zsv_check_data *data) {
|
|
108
|
+
if (data->in && data->in != stdin)
|
|
109
|
+
fclose(data->in);
|
|
110
|
+
if (data->out && data->out != stdout)
|
|
111
|
+
fclose(data->out);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
|
|
115
|
+
struct zsv_prop_handler *custom_prop_handler) {
|
|
116
|
+
if (argc < 1 || (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))) {
|
|
117
|
+
zsv_check_usage();
|
|
118
|
+
return 0;
|
|
119
|
+
}
|
|
120
|
+
struct zsv_opts opts = *optsp;
|
|
121
|
+
struct zsv_check_data data = {0};
|
|
122
|
+
int err = 0;
|
|
123
|
+
for (int arg_i = 1; !err && arg_i < argc; arg_i++) {
|
|
124
|
+
const char *arg = argv[arg_i];
|
|
125
|
+
if (!strcmp(arg, "--display-row"))
|
|
126
|
+
data.display_row = 1;
|
|
127
|
+
// else if (!strcmp(arg, "--utf8"))
|
|
128
|
+
// data.check_utf8 = 1;
|
|
129
|
+
else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) {
|
|
130
|
+
if (data.out)
|
|
131
|
+
err = zsv_printerr(1, "Output specified more than once");
|
|
132
|
+
else {
|
|
133
|
+
const char *fn = zsv_next_arg(++arg_i, argc, argv, &err);
|
|
134
|
+
if (!(fn && *fn))
|
|
135
|
+
err = zsv_printerr(1, "%s requires a filename value", arg);
|
|
136
|
+
else if (!(data.out = fopen(fn, "wb"))) {
|
|
137
|
+
err = errno;
|
|
138
|
+
perror(fn);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
} else if (!data.in) {
|
|
142
|
+
#ifndef NO_STDIN
|
|
143
|
+
if (!strcmp(arg, "-"))
|
|
144
|
+
data.in = stdin;
|
|
145
|
+
#endif
|
|
146
|
+
if (!data.in) {
|
|
147
|
+
if (!(data.in = fopen(arg, "rb"))) {
|
|
148
|
+
err = 1;
|
|
149
|
+
perror(arg);
|
|
150
|
+
} else
|
|
151
|
+
data.input_path = arg;
|
|
152
|
+
}
|
|
153
|
+
} else {
|
|
154
|
+
fprintf(stderr, "Unrecognized option: %s\n", arg);
|
|
155
|
+
err = 1;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (!err && !data.in) {
|
|
160
|
+
#ifndef NO_STDIN
|
|
161
|
+
data.in = stdin;
|
|
162
|
+
#else
|
|
163
|
+
fprintf(stderr, "Please specify an input file\n");
|
|
164
|
+
err = 1;
|
|
165
|
+
#endif
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (err) {
|
|
169
|
+
zsv_check_cleanup(&data);
|
|
170
|
+
return 1;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
data.check_utf8 = 1;
|
|
174
|
+
if (!data.out)
|
|
175
|
+
data.out = stdout;
|
|
176
|
+
opts.row_handler = zsv_check_header;
|
|
177
|
+
opts.stream = data.in;
|
|
178
|
+
opts.ctx = &data;
|
|
179
|
+
if (zsv_new_with_properties(&opts, custom_prop_handler, data.input_path, &data.parser) != zsv_status_ok)
|
|
180
|
+
err = 1;
|
|
181
|
+
else {
|
|
182
|
+
// process the input data
|
|
183
|
+
zsv_handle_ctrl_c_signal();
|
|
184
|
+
enum zsv_status status;
|
|
185
|
+
while (!zsv_signal_interrupted && (status = zsv_parse_more(data.parser)) == zsv_status_ok)
|
|
186
|
+
;
|
|
187
|
+
zsv_finish(data.parser);
|
|
188
|
+
zsv_delete(data.parser);
|
|
189
|
+
}
|
|
190
|
+
zsv_check_cleanup(&data);
|
|
191
|
+
if (!err)
|
|
192
|
+
err = data.err;
|
|
193
|
+
return err;
|
|
194
|
+
}
|