zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,812 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and zsv contributors. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* This file is part of zsv/lib, distributed under the MIT license as defined at
|
|
5
|
+
* https://opensource.org/licenses/MIT
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <stdio.h>
|
|
9
|
+
#include <assert.h>
|
|
10
|
+
#ifdef _WIN32
|
|
11
|
+
#define _CRT_RAND_S // for random number generator, used when sampling. must come before including stdlib.h
|
|
12
|
+
#endif
|
|
13
|
+
#include <stdlib.h>
|
|
14
|
+
#include <stdint.h>
|
|
15
|
+
#include <string.h>
|
|
16
|
+
#include <ctype.h>
|
|
17
|
+
#include <time.h>
|
|
18
|
+
#include <stdarg.h>
|
|
19
|
+
|
|
20
|
+
#define ZSV_COMMAND select_pull
|
|
21
|
+
#include "zsv_command.h"
|
|
22
|
+
|
|
23
|
+
#include <zsv/utils/writer.h>
|
|
24
|
+
#include <zsv/utils/utf8.h>
|
|
25
|
+
#include <zsv/utils/string.h>
|
|
26
|
+
#include <zsv/utils/mem.h>
|
|
27
|
+
#include <zsv/utils/arg.h>
|
|
28
|
+
|
|
29
|
+
struct zsv_select_search_str {
|
|
30
|
+
struct zsv_select_search_str *next;
|
|
31
|
+
const char *value;
|
|
32
|
+
size_t len;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
static void zsv_select_search_str_delete(struct zsv_select_search_str *ss) {
|
|
36
|
+
for (struct zsv_select_search_str *next; ss; ss = next) {
|
|
37
|
+
next = ss->next;
|
|
38
|
+
free(ss);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
struct zsv_select_uint_list {
|
|
43
|
+
struct zsv_select_uint_list *next;
|
|
44
|
+
unsigned int value;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
struct zsv_select_data {
|
|
48
|
+
FILE *in;
|
|
49
|
+
unsigned int current_column_ix;
|
|
50
|
+
size_t data_row_count;
|
|
51
|
+
|
|
52
|
+
struct zsv_opts *opts;
|
|
53
|
+
unsigned int errcount;
|
|
54
|
+
|
|
55
|
+
unsigned int output_col_index; // num of cols printed in current row
|
|
56
|
+
|
|
57
|
+
// output columns:
|
|
58
|
+
const char **col_argv;
|
|
59
|
+
int col_argc;
|
|
60
|
+
char *cols_to_print; // better: bitfield
|
|
61
|
+
|
|
62
|
+
struct {
|
|
63
|
+
unsigned int ix; // index of the input column to be output
|
|
64
|
+
struct { // merge data: only used with --merge
|
|
65
|
+
struct zsv_select_uint_list *indexes, **last_index;
|
|
66
|
+
} merge;
|
|
67
|
+
} *out2in; // array of .output_cols_count length; out2in[x] = y where x = output ix, y = input info
|
|
68
|
+
|
|
69
|
+
unsigned int output_cols_count; // total count of output columns
|
|
70
|
+
|
|
71
|
+
#define MAX_EXCLUSIONS 1024
|
|
72
|
+
const unsigned char *exclusions[MAX_EXCLUSIONS];
|
|
73
|
+
unsigned int exclusion_count;
|
|
74
|
+
|
|
75
|
+
unsigned int header_name_count;
|
|
76
|
+
unsigned char **header_names;
|
|
77
|
+
|
|
78
|
+
const char *prepend_header; // --prepend-header
|
|
79
|
+
|
|
80
|
+
char header_finished;
|
|
81
|
+
|
|
82
|
+
char embedded_lineend;
|
|
83
|
+
|
|
84
|
+
double sample_pct;
|
|
85
|
+
|
|
86
|
+
unsigned char sample_every_n;
|
|
87
|
+
|
|
88
|
+
size_t data_rows_limit;
|
|
89
|
+
size_t skip_data_rows;
|
|
90
|
+
|
|
91
|
+
struct zsv_select_search_str *search_strings;
|
|
92
|
+
|
|
93
|
+
zsv_csv_writer csv_writer;
|
|
94
|
+
|
|
95
|
+
size_t overflow_size;
|
|
96
|
+
|
|
97
|
+
/*
|
|
98
|
+
struct {
|
|
99
|
+
size_t *offsets;
|
|
100
|
+
size_t count;
|
|
101
|
+
} fixed;
|
|
102
|
+
*/
|
|
103
|
+
unsigned char whitespace_clean_flags;
|
|
104
|
+
|
|
105
|
+
unsigned char print_all_cols : 1;
|
|
106
|
+
unsigned char use_header_indexes : 1;
|
|
107
|
+
unsigned char no_trim_whitespace : 1;
|
|
108
|
+
unsigned char cancelled : 1;
|
|
109
|
+
unsigned char skip_this_row : 1;
|
|
110
|
+
unsigned char verbose : 1;
|
|
111
|
+
unsigned char clean_white : 1;
|
|
112
|
+
unsigned char prepend_line_number : 1;
|
|
113
|
+
|
|
114
|
+
unsigned char any_clean : 1;
|
|
115
|
+
#define ZSV_SELECT_DISTINCT_MERGE 2
|
|
116
|
+
unsigned char distinct : 2; // 1 = ignore subsequent cols, ZSV_SELECT_DISTINCT_MERGE = merge subsequent cols (first
|
|
117
|
+
// non-null value)
|
|
118
|
+
|
|
119
|
+
unsigned char no_header : 1;
|
|
120
|
+
unsigned char _ : 4;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
enum zsv_select_column_index_selection_type {
|
|
124
|
+
zsv_select_column_index_selection_type_none = 0,
|
|
125
|
+
zsv_select_column_index_selection_type_single,
|
|
126
|
+
zsv_select_column_index_selection_type_range,
|
|
127
|
+
zsv_select_column_index_selection_type_lower_bounded
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
static enum zsv_select_column_index_selection_type zsv_select_column_index_selection(const unsigned char *arg,
|
|
131
|
+
unsigned *lo, unsigned *hi);
|
|
132
|
+
|
|
133
|
+
static inline void zsv_select_add_exclusion(struct zsv_select_data *data, const char *name) {
|
|
134
|
+
if (data->exclusion_count < MAX_EXCLUSIONS)
|
|
135
|
+
data->exclusions[data->exclusion_count++] = (const unsigned char *)name;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
static inline unsigned char *zsv_select_get_header_name(struct zsv_select_data *data, unsigned in_ix) {
|
|
139
|
+
if (in_ix < data->header_name_count)
|
|
140
|
+
return data->header_names[in_ix];
|
|
141
|
+
return NULL;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
static inline char zsv_select_excluded_current_header_name(struct zsv_select_data *data, unsigned in_ix) {
|
|
145
|
+
if (data->exclusion_count) {
|
|
146
|
+
unsigned char *header_name = zsv_select_get_header_name(data, in_ix);
|
|
147
|
+
if (data->use_header_indexes) {
|
|
148
|
+
for (unsigned int ix = 0; ix < data->exclusion_count; ix++) {
|
|
149
|
+
unsigned i, j;
|
|
150
|
+
switch (zsv_select_column_index_selection(data->exclusions[ix], &i, &j)) {
|
|
151
|
+
case zsv_select_column_index_selection_type_none:
|
|
152
|
+
// not expected!
|
|
153
|
+
break;
|
|
154
|
+
case zsv_select_column_index_selection_type_single:
|
|
155
|
+
if (in_ix + 1 == i)
|
|
156
|
+
return 1;
|
|
157
|
+
break;
|
|
158
|
+
case zsv_select_column_index_selection_type_range:
|
|
159
|
+
if (i <= in_ix + 1 && in_ix + 1 <= j)
|
|
160
|
+
return 1;
|
|
161
|
+
break;
|
|
162
|
+
case zsv_select_column_index_selection_type_lower_bounded:
|
|
163
|
+
if (i <= in_ix + 1)
|
|
164
|
+
return 1;
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
} else {
|
|
169
|
+
if (header_name) {
|
|
170
|
+
for (unsigned int i = 0; i < data->exclusion_count; i++)
|
|
171
|
+
if (!zsv_stricmp(header_name, data->exclusions[i]))
|
|
172
|
+
return 1;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return 0;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// zsv_select_find_header(): return 1-based index, or 0 if not found
|
|
180
|
+
static int zsv_select_find_header(struct zsv_select_data *data, const unsigned char *header_name) {
|
|
181
|
+
if (header_name) {
|
|
182
|
+
for (unsigned int i = 0; i < data->output_cols_count; i++) {
|
|
183
|
+
unsigned char *prior_header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
|
|
184
|
+
if (prior_header_name && !zsv_stricmp(header_name, prior_header_name))
|
|
185
|
+
return i + 1;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return 0;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
static int zsv_select_add_output_col(struct zsv_select_data *data, unsigned in_ix) {
|
|
192
|
+
int err = 0;
|
|
193
|
+
if (data->output_cols_count < data->opts->max_columns) {
|
|
194
|
+
int found = zsv_select_find_header(data, zsv_select_get_header_name(data, in_ix));
|
|
195
|
+
if (data->distinct && found) {
|
|
196
|
+
if (data->distinct == ZSV_SELECT_DISTINCT_MERGE) {
|
|
197
|
+
// add this index
|
|
198
|
+
struct zsv_select_uint_list *ix = calloc(1, sizeof(*ix));
|
|
199
|
+
if (!ix)
|
|
200
|
+
err = zsv_printerr(1, "Out of memory!\n");
|
|
201
|
+
else {
|
|
202
|
+
ix->value = in_ix;
|
|
203
|
+
if (!data->out2in[found - 1].merge.indexes)
|
|
204
|
+
data->out2in[found - 1].merge.indexes = ix;
|
|
205
|
+
else
|
|
206
|
+
*data->out2in[found - 1].merge.last_index = ix;
|
|
207
|
+
data->out2in[found - 1].merge.last_index = &ix->next;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
return err;
|
|
211
|
+
}
|
|
212
|
+
if (zsv_select_excluded_current_header_name(data, in_ix))
|
|
213
|
+
return err;
|
|
214
|
+
data->out2in[data->output_cols_count++].ix = in_ix;
|
|
215
|
+
}
|
|
216
|
+
return err;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// not very fast, but we don't need it to be
|
|
220
|
+
static inline unsigned int str_array_ifind(const unsigned char *needle, unsigned char *haystack[], unsigned hay_count) {
|
|
221
|
+
for (unsigned int i = 0; i < hay_count; i++) {
|
|
222
|
+
if (!(needle && *needle) && !(haystack[i] && *haystack[i]))
|
|
223
|
+
return i + 1;
|
|
224
|
+
if (!(needle && *needle && haystack[i] && *haystack[i]))
|
|
225
|
+
continue;
|
|
226
|
+
if (!zsv_stricmp(needle, haystack[i]))
|
|
227
|
+
return i + 1;
|
|
228
|
+
}
|
|
229
|
+
return 0;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
static int zsv_select_set_output_columns(struct zsv_select_data *data) {
|
|
233
|
+
int err = 0;
|
|
234
|
+
unsigned int header_name_count = data->header_name_count;
|
|
235
|
+
if (!data->col_argc) {
|
|
236
|
+
for (unsigned int i = 0; !err && i < header_name_count; i++)
|
|
237
|
+
err = zsv_select_add_output_col(data, i);
|
|
238
|
+
} else if (data->use_header_indexes) {
|
|
239
|
+
for (int arg_i = 0; !err && arg_i < data->col_argc; arg_i++) {
|
|
240
|
+
const char *arg = data->col_argv[arg_i];
|
|
241
|
+
unsigned i, j;
|
|
242
|
+
switch (zsv_select_column_index_selection((const unsigned char *)arg, &i, &j)) {
|
|
243
|
+
case zsv_select_column_index_selection_type_none:
|
|
244
|
+
zsv_printerr(1, "Invalid column index: %s", arg);
|
|
245
|
+
err = -1;
|
|
246
|
+
break;
|
|
247
|
+
case zsv_select_column_index_selection_type_single:
|
|
248
|
+
err = zsv_select_add_output_col(data, i - 1);
|
|
249
|
+
break;
|
|
250
|
+
case zsv_select_column_index_selection_type_range:
|
|
251
|
+
while (i <= j && i < data->opts->max_columns) {
|
|
252
|
+
err = zsv_select_add_output_col(data, i - 1);
|
|
253
|
+
i++;
|
|
254
|
+
}
|
|
255
|
+
break;
|
|
256
|
+
case zsv_select_column_index_selection_type_lower_bounded:
|
|
257
|
+
if (i) {
|
|
258
|
+
for (unsigned int k = i - 1; !err && k < header_name_count; k++)
|
|
259
|
+
err = zsv_select_add_output_col(data, k);
|
|
260
|
+
}
|
|
261
|
+
break;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
} else { // using header names
|
|
265
|
+
for (int arg_i = 0; !err && arg_i < data->col_argc; arg_i++) {
|
|
266
|
+
// find the location of the matching header name, if any
|
|
267
|
+
unsigned int in_pos =
|
|
268
|
+
str_array_ifind((const unsigned char *)data->col_argv[arg_i], data->header_names, header_name_count);
|
|
269
|
+
if (!in_pos) {
|
|
270
|
+
fprintf(stderr, "Column %s not found\n", data->col_argv[arg_i]);
|
|
271
|
+
err = -1;
|
|
272
|
+
} else
|
|
273
|
+
err = zsv_select_add_output_col(data, in_pos - 1);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return err;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
static void zsv_select_add_search(struct zsv_select_data *data, const char *value) {
|
|
280
|
+
struct zsv_select_search_str *ss = calloc(1, sizeof(*ss));
|
|
281
|
+
ss->value = value;
|
|
282
|
+
ss->len = value ? strlen(value) : 0;
|
|
283
|
+
ss->next = data->search_strings;
|
|
284
|
+
data->search_strings = ss;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
#ifndef NDEBUG
|
|
288
|
+
__attribute__((always_inline)) static inline
|
|
289
|
+
#endif
|
|
290
|
+
unsigned char *
|
|
291
|
+
zsv_select_cell_clean(struct zsv_select_data *data, unsigned char *utf8_value, char quoted, size_t *lenp) {
|
|
292
|
+
size_t len = *lenp;
|
|
293
|
+
// to do: option to replace or warn non-printable chars 0 - 31:
|
|
294
|
+
// vectorized scan
|
|
295
|
+
// replace or warn if found
|
|
296
|
+
|
|
297
|
+
if (UNLIKELY(!data->no_trim_whitespace))
|
|
298
|
+
utf8_value = (unsigned char *)zsv_strtrim(utf8_value, &len);
|
|
299
|
+
|
|
300
|
+
if (UNLIKELY(data->clean_white))
|
|
301
|
+
len = zsv_strwhite(utf8_value, len, data->whitespace_clean_flags); // to do: zsv_clean
|
|
302
|
+
|
|
303
|
+
if (UNLIKELY(data->embedded_lineend && quoted)) {
|
|
304
|
+
unsigned char *tmp;
|
|
305
|
+
const char *to_replace[] = {"\r\n", "\r", "\n"};
|
|
306
|
+
for (int i = 0; i < 3; i++) {
|
|
307
|
+
while ((tmp = memmem(utf8_value, len, to_replace[i], strlen(to_replace[i])))) {
|
|
308
|
+
if (strlen(to_replace[i]) == 1)
|
|
309
|
+
*tmp = data->embedded_lineend;
|
|
310
|
+
else {
|
|
311
|
+
size_t right_len = utf8_value + len - tmp;
|
|
312
|
+
memmove(tmp + 1, tmp + 2, right_len - 2);
|
|
313
|
+
*tmp = data->embedded_lineend;
|
|
314
|
+
len--;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
if (data->no_trim_whitespace)
|
|
319
|
+
utf8_value = (unsigned char *)zsv_strtrim(utf8_value, &len);
|
|
320
|
+
}
|
|
321
|
+
*lenp = len;
|
|
322
|
+
return utf8_value;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
static inline char zsv_select_row_search_hit(struct zsv_select_data *data, zsv_parser p) {
|
|
326
|
+
if (!data->search_strings)
|
|
327
|
+
return 1;
|
|
328
|
+
|
|
329
|
+
unsigned int j = zsv_cell_count(p);
|
|
330
|
+
for (unsigned int i = 0; i < j; i++) {
|
|
331
|
+
struct zsv_cell cell = zsv_get_cell(p, i);
|
|
332
|
+
if (UNLIKELY(data->any_clean != 0))
|
|
333
|
+
cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
|
|
334
|
+
if (cell.len) {
|
|
335
|
+
for (struct zsv_select_search_str *ss = data->search_strings; ss; ss = ss->next)
|
|
336
|
+
if (ss->value && *ss->value && memmem(cell.str, cell.len, ss->value, ss->len))
|
|
337
|
+
return 1;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return 0;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
static enum zsv_select_column_index_selection_type zsv_select_column_index_selection(const unsigned char *arg,
|
|
344
|
+
unsigned *lo, unsigned *hi) {
|
|
345
|
+
enum zsv_select_column_index_selection_type result = zsv_select_column_index_selection_type_none;
|
|
346
|
+
|
|
347
|
+
unsigned int i = 0;
|
|
348
|
+
unsigned int j = 0;
|
|
349
|
+
int n = 0;
|
|
350
|
+
int k = sscanf((const char *)arg, "%u-%u%n", &i, &j, &n);
|
|
351
|
+
if (k == 2) {
|
|
352
|
+
if (n >= 0 && (size_t)n == strlen((const char *)arg) && i > 0 && j >= i)
|
|
353
|
+
result = zsv_select_column_index_selection_type_range;
|
|
354
|
+
} else {
|
|
355
|
+
k = sscanf((const char *)arg, "%u%n", &i, &n);
|
|
356
|
+
if (k == 1 && n >= 0 && (size_t)n == strlen((const char *)arg)) {
|
|
357
|
+
if (i > 0)
|
|
358
|
+
result = zsv_select_column_index_selection_type_single;
|
|
359
|
+
} else {
|
|
360
|
+
k = sscanf((const char *)arg, "%u-%n", &i, &n);
|
|
361
|
+
if (k == 1 && n >= 0 && (size_t)n == strlen((const char *)arg)) {
|
|
362
|
+
if (i > 0) {
|
|
363
|
+
result = zsv_select_column_index_selection_type_lower_bounded;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
if (lo)
|
|
369
|
+
*lo = i;
|
|
370
|
+
if (hi)
|
|
371
|
+
*hi = j;
|
|
372
|
+
return result;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// zsv_select_check_exclusions_are_indexes(): return err
|
|
376
|
+
static int zsv_select_check_exclusions_are_indexes(struct zsv_select_data *data) {
|
|
377
|
+
int err = 0;
|
|
378
|
+
for (unsigned int e = 0; e < data->exclusion_count; e++) {
|
|
379
|
+
const unsigned char *arg = data->exclusions[e];
|
|
380
|
+
if (zsv_select_column_index_selection(arg, NULL, NULL) == zsv_select_column_index_selection_type_none)
|
|
381
|
+
err = zsv_printerr(1, "Invalid column index: %s", arg);
|
|
382
|
+
}
|
|
383
|
+
return err;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// demo_random_bw_1_and_100(): this is a poor random number generator. you probably
|
|
387
|
+
// will want to use a better one
|
|
388
|
+
static double demo_random_bw_1_and_100(void) {
|
|
389
|
+
#ifdef HAVE_ARC4RANDOM_UNIFORM
|
|
390
|
+
return (long double)(arc4random_uniform(1000000)) / 10000;
|
|
391
|
+
#else
|
|
392
|
+
double max = 100.0;
|
|
393
|
+
unsigned int n;
|
|
394
|
+
#ifdef HAVE_RAND_S
|
|
395
|
+
unsigned int tries = 0;
|
|
396
|
+
while (rand_s(&n) && tries++ < 10)
|
|
397
|
+
;
|
|
398
|
+
return (double)n / ((double)UINT_MAX + 1) * max;
|
|
399
|
+
#else
|
|
400
|
+
unsigned int umax = ~0;
|
|
401
|
+
n = rand();
|
|
402
|
+
return (double)n / ((double)(umax) + 1) * max;
|
|
403
|
+
#endif
|
|
404
|
+
#endif
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// zsv_select_output_row(): output row data
|
|
408
|
+
static void zsv_select_output_data_row(struct zsv_select_data *data, zsv_parser p) {
|
|
409
|
+
unsigned int cnt = data->output_cols_count;
|
|
410
|
+
char first = 1;
|
|
411
|
+
if (data->prepend_line_number) {
|
|
412
|
+
zsv_writer_cell_zu(data->csv_writer, first, data->data_row_count);
|
|
413
|
+
first = 0;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/* print data row */
|
|
417
|
+
for (unsigned int i = 0; i < cnt; i++) { // for each output column
|
|
418
|
+
unsigned int in_ix = data->out2in[i].ix;
|
|
419
|
+
struct zsv_cell cell = zsv_get_cell(p, in_ix);
|
|
420
|
+
if (UNLIKELY(data->any_clean != 0))
|
|
421
|
+
cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
|
|
422
|
+
if (VERY_UNLIKELY(data->distinct == ZSV_SELECT_DISTINCT_MERGE)) {
|
|
423
|
+
if (UNLIKELY(cell.len == 0)) {
|
|
424
|
+
for (struct zsv_select_uint_list *ix = data->out2in[i].merge.indexes; ix; ix = ix->next) {
|
|
425
|
+
unsigned int m_ix = ix->value;
|
|
426
|
+
cell = zsv_get_cell(p, m_ix);
|
|
427
|
+
if (cell.len) {
|
|
428
|
+
if (UNLIKELY(data->any_clean != 0))
|
|
429
|
+
cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
|
|
430
|
+
if (cell.len)
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
zsv_writer_cell(data->csv_writer, first, cell.str, cell.len, cell.quoted);
|
|
437
|
+
first = 0;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
static void zsv_select_data_row(struct zsv_select_data *data, zsv_parser p) {
|
|
442
|
+
data->data_row_count++;
|
|
443
|
+
|
|
444
|
+
if (UNLIKELY(zsv_cell_count(p) == 0 || data->cancelled))
|
|
445
|
+
return;
|
|
446
|
+
|
|
447
|
+
// check if we should skip this row
|
|
448
|
+
data->skip_this_row = 0;
|
|
449
|
+
if (UNLIKELY(data->skip_data_rows)) {
|
|
450
|
+
data->skip_data_rows--;
|
|
451
|
+
data->skip_this_row = 1;
|
|
452
|
+
} else if (UNLIKELY(data->sample_every_n || data->sample_pct)) {
|
|
453
|
+
data->skip_this_row = 1;
|
|
454
|
+
if (data->sample_every_n && data->data_row_count % data->sample_every_n == 1)
|
|
455
|
+
data->skip_this_row = 0;
|
|
456
|
+
if (data->sample_pct && demo_random_bw_1_and_100() <= data->sample_pct)
|
|
457
|
+
data->skip_this_row = 0;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
if (LIKELY(!data->skip_this_row)) {
|
|
461
|
+
// if we have a search filter, check that
|
|
462
|
+
char skip = 0;
|
|
463
|
+
skip = !zsv_select_row_search_hit(data, p);
|
|
464
|
+
if (!skip) {
|
|
465
|
+
|
|
466
|
+
// print the data row
|
|
467
|
+
zsv_select_output_data_row(data, p);
|
|
468
|
+
if (UNLIKELY(data->data_rows_limit > 0))
|
|
469
|
+
if (data->data_row_count + 1 >= data->data_rows_limit)
|
|
470
|
+
data->cancelled = 1;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
if (data->data_row_count % 25000 == 0 && data->verbose)
|
|
474
|
+
fprintf(stderr, "Processed %zu rows\n", data->data_row_count);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
static void zsv_select_print_header_row(struct zsv_select_data *data) {
|
|
478
|
+
if (data->no_header)
|
|
479
|
+
return;
|
|
480
|
+
zsv_writer_cell_prepend(data->csv_writer, (const unsigned char *)data->prepend_header);
|
|
481
|
+
if (data->prepend_line_number)
|
|
482
|
+
zsv_writer_cell_s(data->csv_writer, 1, (const unsigned char *)"#", 0);
|
|
483
|
+
for (unsigned int i = 0; i < data->output_cols_count; i++) {
|
|
484
|
+
unsigned char *header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
|
|
485
|
+
zsv_writer_cell_s(data->csv_writer, i == 0 && !data->prepend_line_number, header_name, 1);
|
|
486
|
+
}
|
|
487
|
+
zsv_writer_cell_prepend(data->csv_writer, NULL);
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
static void zsv_select_header_finish(struct zsv_select_data *data) {
|
|
491
|
+
if (zsv_select_set_output_columns(data))
|
|
492
|
+
data->cancelled = 1;
|
|
493
|
+
else
|
|
494
|
+
zsv_select_print_header_row(data);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
static void zsv_select_header_row(struct zsv_select_data *data, zsv_parser p) {
|
|
498
|
+
if (data->cancelled)
|
|
499
|
+
return;
|
|
500
|
+
|
|
501
|
+
unsigned int cols = zsv_cell_count(p);
|
|
502
|
+
unsigned int max_header_ix = 0;
|
|
503
|
+
for (unsigned int i = 0; i < cols; i++) {
|
|
504
|
+
struct zsv_cell cell = zsv_get_cell(p, i);
|
|
505
|
+
if (UNLIKELY(data->any_clean != 0))
|
|
506
|
+
cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
|
|
507
|
+
if (i < data->opts->max_columns) {
|
|
508
|
+
data->header_names[i] = zsv_memdup(cell.str, cell.len);
|
|
509
|
+
max_header_ix = i + 1;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// in case we want to make this an option later
|
|
514
|
+
char trim_trailing_columns = 1;
|
|
515
|
+
if (!trim_trailing_columns)
|
|
516
|
+
max_header_ix = cols;
|
|
517
|
+
|
|
518
|
+
if (max_header_ix > data->header_name_count)
|
|
519
|
+
data->header_name_count = max_header_ix;
|
|
520
|
+
|
|
521
|
+
zsv_select_header_finish(data);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
#define ZSV_SELECT_MAX_COLS_DEFAULT 1024
|
|
525
|
+
#define ZSV_SELECT_MAX_COLS_DEFAULT_S "1024"
|
|
526
|
+
|
|
527
|
+
const char *zsv_select_usage_msg[] = {
|
|
528
|
+
APPNAME ": extracts and outputs specified columns",
|
|
529
|
+
"",
|
|
530
|
+
"Usage: " APPNAME " [filename] [options] [-- col_specifier [... col_specifier]]",
|
|
531
|
+
" where col_specifier is a column name or, if the -n option is used,",
|
|
532
|
+
" a column index (starting at 1) or index range in the form of n-m",
|
|
533
|
+
" e.g. " APPNAME " -n file.csv -- 1 4-6 50 10",
|
|
534
|
+
" " APPNAME " file.csv -- first_col fiftieth_column \"Tenth Column\"",
|
|
535
|
+
"",
|
|
536
|
+
"Note: Outputs the columns specified after '--' separator, or all columns if omitted.",
|
|
537
|
+
"",
|
|
538
|
+
"Options:",
|
|
539
|
+
" -b,--with-bom : output with BOM",
|
|
540
|
+
// " --fixed <offset1,offset2,offset3>: parse as fixed-width text; use given comma-separated list of positive integers
|
|
541
|
+
// for cell end indexes",
|
|
542
|
+
#ifndef ZSV_CLI
|
|
543
|
+
" -v, --verbose : verbose output",
|
|
544
|
+
#endif
|
|
545
|
+
" -H,--head <n> : (head) only process the first n rows of input data (including header)",
|
|
546
|
+
" --no-header : do not output header row",
|
|
547
|
+
" --prepend-header <value> : prepend each column header with the given text <value>",
|
|
548
|
+
" -s, --search <value> : only output rows with at least one cell containing <value>",
|
|
549
|
+
// TO DO: " -s, --search /<pattern>/modifiers: search on regex pattern; modifiers include 'g' (global) and 'i'
|
|
550
|
+
// (case-insensitive)",
|
|
551
|
+
" --sample-every <num_of_rows> : output a sample consisting of the first row, then every nth row",
|
|
552
|
+
" --sample-pct <percentage> : output a randomly-selected sample (32 bits of randomness) of n%% of input rows",
|
|
553
|
+
" -d,--header-row-span <n> : apply header depth (rowspan) of n",
|
|
554
|
+
" --distinct : skip subsequent occurrences of columns with the same name",
|
|
555
|
+
" --merge : merge subsequent occurrences of columns with the same name",
|
|
556
|
+
" outputting first non-null value",
|
|
557
|
+
// --rename: like distinct, but instead of removing cols with dupe names, renames them, trying _<n> for n up to max
|
|
558
|
+
// cols
|
|
559
|
+
" -e <embedded_lineend_char> : char to replace embedded lineend. If left empty, embedded lineends are preserved.",
|
|
560
|
+
" If the provided string begins with 0x, it will be interpreted as the hex",
|
|
561
|
+
" representation of a string.",
|
|
562
|
+
" -x <column> : exclude the indicated column. can be specified more than once",
|
|
563
|
+
" -N,--line-number : prefix each row with the row number",
|
|
564
|
+
" -n : provided column indexes are numbers corresponding to column positions",
|
|
565
|
+
" (starting with 1), instead of names",
|
|
566
|
+
#ifndef ZSV_CLI
|
|
567
|
+
" -T : input is tab-delimited, instead of comma-delimited",
|
|
568
|
+
" -O,--other-delim <delim> : input is delimited with the given char",
|
|
569
|
+
" Note: This option does not support quoted values with embedded delimiters.",
|
|
570
|
+
#endif
|
|
571
|
+
" -w,--whitespace-clean : normalize all whitespace to space or newline, single-char (non-consecutive)",
|
|
572
|
+
" occurrences",
|
|
573
|
+
" --whitespace-clean-no-newline: clean whitespace and remove embedded newlines",
|
|
574
|
+
" -W,--no-trim : do not trim whitespace",
|
|
575
|
+
#ifndef ZSV_CLI
|
|
576
|
+
" -C <max_num_of_columns> : defaults to " ZSV_SELECT_MAX_COLS_DEFAULT_S,
|
|
577
|
+
" -L,--max-row-size <n> : set the maximum memory used for a single row",
|
|
578
|
+
" Default: " ZSV_ROW_MAX_SIZE_MIN_S " (min), " ZSV_ROW_MAX_SIZE_DEFAULT_S " (max)",
|
|
579
|
+
#endif
|
|
580
|
+
" -o <filename> : filename to save output to",
|
|
581
|
+
NULL,
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
static void zsv_select_usage(void) {
|
|
585
|
+
for (size_t i = 0; zsv_select_usage_msg[i]; i++)
|
|
586
|
+
fprintf(stdout, "%s\n", zsv_select_usage_msg[i]);
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
static void zsv_select_cleanup(struct zsv_select_data *data) {
|
|
590
|
+
if (data->opts->stream && data->opts->stream != stdin)
|
|
591
|
+
fclose(data->opts->stream);
|
|
592
|
+
|
|
593
|
+
zsv_writer_delete(data->csv_writer);
|
|
594
|
+
zsv_select_search_str_delete(data->search_strings);
|
|
595
|
+
|
|
596
|
+
if (data->distinct == ZSV_SELECT_DISTINCT_MERGE) {
|
|
597
|
+
for (unsigned int i = 0; i < data->output_cols_count; i++) {
|
|
598
|
+
for (struct zsv_select_uint_list *next, *ix = data->out2in[i].merge.indexes; ix; ix = next) {
|
|
599
|
+
next = ix->next;
|
|
600
|
+
free(ix);
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
free(data->out2in);
|
|
605
|
+
|
|
606
|
+
for (unsigned int i = 0; i < data->header_name_count; i++)
|
|
607
|
+
free(data->header_names[i]);
|
|
608
|
+
free(data->header_names);
|
|
609
|
+
|
|
610
|
+
// free(data->fixed.offsets);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
|
|
614
|
+
struct zsv_prop_handler *custom_prop_handler) {
|
|
615
|
+
if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
|
|
616
|
+
zsv_select_usage();
|
|
617
|
+
return zsv_status_ok;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
int err = 0;
|
|
621
|
+
struct zsv_select_data data = {0};
|
|
622
|
+
data.opts = opts;
|
|
623
|
+
const char *input_path = NULL;
|
|
624
|
+
struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
|
|
625
|
+
int col_index_arg_i = 0;
|
|
626
|
+
enum zsv_status stat = zsv_status_ok;
|
|
627
|
+
for (int arg_i = 1; stat == zsv_status_ok && arg_i < argc; arg_i++) {
|
|
628
|
+
if (!strcmp(argv[arg_i], "--")) {
|
|
629
|
+
col_index_arg_i = arg_i + 1;
|
|
630
|
+
break;
|
|
631
|
+
}
|
|
632
|
+
if (!strcmp(argv[arg_i], "-b") || !strcmp(argv[arg_i], "--with-bom"))
|
|
633
|
+
writer_opts.with_bom = 1;
|
|
634
|
+
/*
|
|
635
|
+
else if(!strcmp(argv[arg_i], "--fixed")) {
|
|
636
|
+
if(++arg_i >= argc)
|
|
637
|
+
stat = zsv_printerr(1, "%s option requires parameter", argv[arg_i-1]);
|
|
638
|
+
else { // parse offsets
|
|
639
|
+
data.fixed.count = 1;
|
|
640
|
+
for(const char *s = argv[arg_i]; *s; s++)
|
|
641
|
+
if(*s == ',')
|
|
642
|
+
data.fixed.count++;
|
|
643
|
+
free(data.fixed.offsets);
|
|
644
|
+
data.fixed.offsets = malloc(data.fixed.count * sizeof(*data.fixed.offsets));
|
|
645
|
+
size_t count = 0;
|
|
646
|
+
const char *start = argv[arg_i];
|
|
647
|
+
for(const char *end = argv[arg_i]; ; end++) {
|
|
648
|
+
if(*end == ',' || *end == '\0') {
|
|
649
|
+
if(!sscanf(start, "%zu,", &data.fixed.offsets[count++])) {
|
|
650
|
+
stat = zsv_printerr(1, "Invalid offset: %s.*\n", end - start, start);
|
|
651
|
+
break;
|
|
652
|
+
} else if(*end == '\0')
|
|
653
|
+
break;
|
|
654
|
+
else {
|
|
655
|
+
start = end + 1;
|
|
656
|
+
if(*start == '\0')
|
|
657
|
+
break;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
} */
|
|
663
|
+
else if (!strcmp(argv[arg_i], "--distinct"))
|
|
664
|
+
data.distinct = 1;
|
|
665
|
+
else if (!strcmp(argv[arg_i], "--merge"))
|
|
666
|
+
data.distinct = ZSV_SELECT_DISTINCT_MERGE;
|
|
667
|
+
else if (!strcmp(argv[arg_i], "-o") || !strcmp(argv[arg_i], "--output")) {
|
|
668
|
+
if (++arg_i >= argc)
|
|
669
|
+
stat = zsv_printerr(1, "%s option requires parameter", argv[arg_i - 1]);
|
|
670
|
+
else if (writer_opts.stream && writer_opts.stream != stdout)
|
|
671
|
+
stat = zsv_printerr(1, "Output file specified more than once");
|
|
672
|
+
else if (!(writer_opts.stream = fopen(argv[arg_i], "wb")))
|
|
673
|
+
stat = zsv_printerr(1, "Unable to open for writing: %s", argv[arg_i]);
|
|
674
|
+
else if (data.opts->verbose)
|
|
675
|
+
fprintf(stderr, "Opened %s for write\n", argv[arg_i]);
|
|
676
|
+
} else if (!strcmp(argv[arg_i], "-N") || !strcmp(argv[arg_i], "--line-number")) {
|
|
677
|
+
data.prepend_line_number = 1;
|
|
678
|
+
} else if (!strcmp(argv[arg_i], "-n"))
|
|
679
|
+
data.use_header_indexes = 1;
|
|
680
|
+
else if (!strcmp(argv[arg_i], "-s") || !strcmp(argv[arg_i], "--search")) {
|
|
681
|
+
arg_i++;
|
|
682
|
+
if (arg_i < argc && strlen(argv[arg_i]))
|
|
683
|
+
zsv_select_add_search(&data, argv[arg_i]);
|
|
684
|
+
else
|
|
685
|
+
stat = zsv_printerr(1, "%s option requires a value", argv[arg_i - 1]);
|
|
686
|
+
} else if (!strcmp(argv[arg_i], "-v") || !strcmp(argv[arg_i], "--verbose")) {
|
|
687
|
+
data.verbose = 1;
|
|
688
|
+
} else if (!strcmp(argv[arg_i], "-w") || !strcmp(argv[arg_i], "--whitespace-clean"))
|
|
689
|
+
data.clean_white = 1;
|
|
690
|
+
else if (!strcmp(argv[arg_i], "--whitespace-clean-no-newline")) {
|
|
691
|
+
data.clean_white = 1;
|
|
692
|
+
data.whitespace_clean_flags = 1;
|
|
693
|
+
} else if (!strcmp(argv[arg_i], "-W") || !strcmp(argv[arg_i], "--no-trim")) {
|
|
694
|
+
data.no_trim_whitespace = 1;
|
|
695
|
+
} else if (!strcmp(argv[arg_i], "--sample-every")) {
|
|
696
|
+
arg_i++;
|
|
697
|
+
if (!(arg_i < argc))
|
|
698
|
+
stat = zsv_printerr(1, "--sample-every option requires a value");
|
|
699
|
+
else if (atoi(argv[arg_i]) <= 0)
|
|
700
|
+
stat = zsv_printerr(1, "--sample-every value should be an integer > 0");
|
|
701
|
+
else
|
|
702
|
+
data.sample_every_n = atoi(argv[arg_i]);
|
|
703
|
+
} else if (!strcmp(argv[arg_i], "--sample-pct")) {
|
|
704
|
+
arg_i++;
|
|
705
|
+
double d;
|
|
706
|
+
if (!(arg_i < argc))
|
|
707
|
+
stat = zsv_printerr(1, "--sample-pct option requires a value");
|
|
708
|
+
else if (!(d = atof(argv[arg_i])) && d > 0 && d < 100)
|
|
709
|
+
stat = zsv_printerr(
|
|
710
|
+
-1, "--sample-pct value should be a number between 0 and 100 (e.g. 1.5 for a sample of 1.5%% of the data");
|
|
711
|
+
else
|
|
712
|
+
data.sample_pct = d;
|
|
713
|
+
} else if (!strcmp(argv[arg_i], "--prepend-header"))
|
|
714
|
+
data.prepend_header = zsv_next_arg(++arg_i, argc, argv, &err);
|
|
715
|
+
else if (!strcmp(argv[arg_i], "--no-header"))
|
|
716
|
+
data.no_header = 1;
|
|
717
|
+
else if (!strcmp(argv[arg_i], "-H") || !strcmp(argv[arg_i], "--head")) {
|
|
718
|
+
if (!(arg_i + 1 < argc && atoi(argv[arg_i + 1]) >= 0))
|
|
719
|
+
stat = zsv_printerr(1, "%s option value invalid: should be positive integer; got %s", argv[arg_i],
|
|
720
|
+
arg_i + 1 < argc ? argv[arg_i + 1] : "");
|
|
721
|
+
else
|
|
722
|
+
data.data_rows_limit = atoi(argv[++arg_i]) + 1;
|
|
723
|
+
} else if (!strcmp(argv[arg_i], "-D") || !strcmp(argv[arg_i], "--skip-data")) {
|
|
724
|
+
++arg_i;
|
|
725
|
+
if (!(arg_i < argc && atoi(argv[arg_i]) >= 0))
|
|
726
|
+
stat = zsv_printerr(1, "%s option value invalid: should be positive integer", argv[arg_i - 1]);
|
|
727
|
+
else
|
|
728
|
+
data.skip_data_rows = atoi(argv[arg_i]);
|
|
729
|
+
} else if (!strcmp(argv[arg_i], "-e")) {
|
|
730
|
+
++arg_i;
|
|
731
|
+
if (data.embedded_lineend)
|
|
732
|
+
stat = zsv_printerr(1, "-e option specified more than once");
|
|
733
|
+
else if (strlen(argv[arg_i]) != 1)
|
|
734
|
+
stat = zsv_printerr(1, "-e option value must be a single character");
|
|
735
|
+
else if (arg_i < argc)
|
|
736
|
+
data.embedded_lineend = *argv[arg_i];
|
|
737
|
+
else
|
|
738
|
+
stat = zsv_printerr(1, "-e option requires a value");
|
|
739
|
+
} else if (!strcmp(argv[arg_i], "-x")) {
|
|
740
|
+
arg_i++;
|
|
741
|
+
if (!(arg_i < argc))
|
|
742
|
+
stat = zsv_printerr(1, "%s option requires a value", argv[arg_i - 1]);
|
|
743
|
+
else
|
|
744
|
+
zsv_select_add_exclusion(&data, argv[arg_i]);
|
|
745
|
+
} else if (*argv[arg_i] == '-')
|
|
746
|
+
stat = zsv_printerr(1, "Unrecognized argument: %s", argv[arg_i]);
|
|
747
|
+
else if (data.opts->stream)
|
|
748
|
+
stat = zsv_printerr(1, "Input file was specified, cannot also read: %s", argv[arg_i]);
|
|
749
|
+
else if (!(data.opts->stream = fopen(argv[arg_i], "rb")))
|
|
750
|
+
stat = zsv_printerr(1, "Could not open for reading: %s", argv[arg_i]);
|
|
751
|
+
else
|
|
752
|
+
input_path = argv[arg_i];
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
if (data.sample_pct)
|
|
756
|
+
srand(time(0));
|
|
757
|
+
|
|
758
|
+
if (data.use_header_indexes && stat == zsv_status_ok)
|
|
759
|
+
stat = zsv_select_check_exclusions_are_indexes(&data);
|
|
760
|
+
|
|
761
|
+
if (!data.opts->stream) {
|
|
762
|
+
#ifdef NO_STDIN
|
|
763
|
+
stat = zsv_printerr(1, "Please specify an input file");
|
|
764
|
+
#else
|
|
765
|
+
data.opts->stream = stdin;
|
|
766
|
+
#endif
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
if (stat == zsv_status_ok) {
|
|
770
|
+
if (!col_index_arg_i)
|
|
771
|
+
data.col_argc = 0;
|
|
772
|
+
else {
|
|
773
|
+
data.col_argv = &argv[col_index_arg_i];
|
|
774
|
+
data.col_argc = argc - col_index_arg_i;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
data.header_names = calloc(data.opts->max_columns, sizeof(*data.header_names));
|
|
778
|
+
assert(data.opts->max_columns > 0);
|
|
779
|
+
data.out2in = calloc(data.opts->max_columns, sizeof(*data.out2in));
|
|
780
|
+
data.csv_writer = zsv_writer_new(&writer_opts);
|
|
781
|
+
if (!(data.header_names && data.csv_writer))
|
|
782
|
+
stat = zsv_status_memory;
|
|
783
|
+
else {
|
|
784
|
+
zsv_parser parser;
|
|
785
|
+
if (zsv_new_with_properties(data.opts, custom_prop_handler, input_path, &parser) == zsv_status_ok) {
|
|
786
|
+
// all done with
|
|
787
|
+
data.any_clean = !data.no_trim_whitespace || data.clean_white || data.embedded_lineend;
|
|
788
|
+
|
|
789
|
+
// TO DO: support fixed input
|
|
790
|
+
// if (data.fixed.count && zsv_set_fixed_offsets(parser, data.fixed.count, data.fixed.offsets) != zsv_status_ok)
|
|
791
|
+
// data.cancelled = 1;
|
|
792
|
+
|
|
793
|
+
// create a local csv writer buff quoted values
|
|
794
|
+
unsigned char writer_buff[512];
|
|
795
|
+
zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
|
|
796
|
+
|
|
797
|
+
// process the input data
|
|
798
|
+
zsv_handle_ctrl_c_signal();
|
|
799
|
+
enum zsv_status status = zsv_next_row(parser);
|
|
800
|
+
if (status == zsv_status_row)
|
|
801
|
+
zsv_select_header_row(&data, parser);
|
|
802
|
+
while ((status = zsv_next_row(parser)) == zsv_status_row)
|
|
803
|
+
zsv_select_data_row(&data, parser);
|
|
804
|
+
zsv_delete(parser);
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
zsv_select_cleanup(&data);
|
|
809
|
+
if (writer_opts.stream && writer_opts.stream != stdout)
|
|
810
|
+
fclose(writer_opts.stream);
|
|
811
|
+
return stat;
|
|
812
|
+
}
|