zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
#include <string.h>
|
|
11
|
+
#include <stdlib.h>
|
|
12
|
+
#include <pthread.h>
|
|
13
|
+
#include <sys/types.h> // off_t
|
|
14
|
+
|
|
15
|
+
#define ZSV_COMMAND count
|
|
16
|
+
#include "zsv_command.h"
|
|
17
|
+
#include <zsv/utils/file.h>
|
|
18
|
+
#include <zsv/utils/os.h> // zsv_get_number_of_cores
|
|
19
|
+
#include "utils/chunk.h"
|
|
20
|
+
|
|
21
|
+
#define ZSV_COUNT_PARALLEL_MIN_BYTES (1024 * 1024 * 2)
|
|
22
|
+
|
|
23
|
+
struct zsv_chunk_count_data {
|
|
24
|
+
unsigned int id;
|
|
25
|
+
size_t start_offset;
|
|
26
|
+
size_t end_offset;
|
|
27
|
+
|
|
28
|
+
size_t actual_next_row_start;
|
|
29
|
+
size_t row_count;
|
|
30
|
+
int status;
|
|
31
|
+
|
|
32
|
+
const char *input_path;
|
|
33
|
+
struct zsv_opts *opts_template;
|
|
34
|
+
|
|
35
|
+
int skip;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
struct zsv_count_parallel_data {
|
|
39
|
+
unsigned int chunk_count;
|
|
40
|
+
struct zsv_chunk_count_data *chunks;
|
|
41
|
+
pthread_t *threads;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
struct data {
|
|
45
|
+
zsv_parser parser;
|
|
46
|
+
size_t rows;
|
|
47
|
+
|
|
48
|
+
struct zsv_opts *opts;
|
|
49
|
+
const char *input_path;
|
|
50
|
+
unsigned int num_chunks;
|
|
51
|
+
|
|
52
|
+
int run_in_parallel;
|
|
53
|
+
int cancelled;
|
|
54
|
+
#ifndef ZSV_NO_PARALLEL
|
|
55
|
+
struct zsv_count_parallel_data *pdata;
|
|
56
|
+
size_t end_offset_limit; // where this chunk (chunk 0) should stop
|
|
57
|
+
size_t next_row_start; // where chunk 0 actually ended
|
|
58
|
+
#endif
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
#ifndef ZSV_NO_PARALLEL
|
|
62
|
+
static void *process_chunk_internal(struct zsv_chunk_count_data *cdata);
|
|
63
|
+
|
|
64
|
+
static struct zsv_count_parallel_data *parallel_data_new(unsigned int count) {
|
|
65
|
+
struct zsv_count_parallel_data *pd = calloc(1, sizeof(*pd));
|
|
66
|
+
if (!pd)
|
|
67
|
+
return NULL;
|
|
68
|
+
pd->chunk_count = count;
|
|
69
|
+
pd->chunks = calloc(count, sizeof(*pd->chunks));
|
|
70
|
+
pd->threads = calloc(count, sizeof(*pd->threads));
|
|
71
|
+
if (!pd->chunks || !pd->threads) {
|
|
72
|
+
free(pd->chunks);
|
|
73
|
+
free(pd->threads);
|
|
74
|
+
free(pd);
|
|
75
|
+
return NULL;
|
|
76
|
+
}
|
|
77
|
+
return pd;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
static void parallel_data_delete(struct zsv_count_parallel_data *pd) {
|
|
81
|
+
if (pd) {
|
|
82
|
+
free(pd->chunks);
|
|
83
|
+
free(pd->threads);
|
|
84
|
+
free(pd);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
#endif
|
|
89
|
+
|
|
90
|
+
/* serial (non-parallelized) row handlers */
|
|
91
|
+
static void row_verbose(void *ctx) {
|
|
92
|
+
struct data *data = ctx;
|
|
93
|
+
data->rows++;
|
|
94
|
+
if (data->rows % 1000000 == 0)
|
|
95
|
+
fprintf(stderr, "Processed %zu data rows\n", data->rows / 1000000);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
static void row_simple(void *ctx) {
|
|
99
|
+
((struct data *)ctx)->rows++;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
#ifndef ZSV_NO_PARALLEL
|
|
103
|
+
/* parallelized row handers */
|
|
104
|
+
static void row_parallel_done(void *ctx) {
|
|
105
|
+
struct data *data = ctx;
|
|
106
|
+
// Find start of the next row
|
|
107
|
+
data->next_row_start = zsv_cum_scanned_length(data->parser) - zsv_row_length_raw_bytes(data->parser);
|
|
108
|
+
zsv_abort(data->parser);
|
|
109
|
+
data->cancelled = 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
static void row_parallel(void *ctx) {
|
|
113
|
+
struct data *data = ctx;
|
|
114
|
+
data->rows++;
|
|
115
|
+
|
|
116
|
+
if (UNLIKELY((off_t)zsv_cum_scanned_length(data->parser) >= data->end_offset_limit)) {
|
|
117
|
+
// We crossed the boundary. We must finish this row, then stop.
|
|
118
|
+
// Switch handler to 'done' to catch the exact end of this row.
|
|
119
|
+
zsv_set_row_handler(data->parser, row_parallel_done);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
struct worker_ctx {
|
|
124
|
+
struct zsv_chunk_count_data *cdata;
|
|
125
|
+
zsv_parser parser;
|
|
126
|
+
size_t limit_len;
|
|
127
|
+
int cancelled;
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
static void worker_row_done(void *ctx) {
|
|
131
|
+
struct worker_ctx *wctx = ctx;
|
|
132
|
+
// Calculate absolute offset of the *next* row start
|
|
133
|
+
size_t scanned = zsv_cum_scanned_length(wctx->parser);
|
|
134
|
+
wctx->cdata->actual_next_row_start = wctx->cdata->start_offset + scanned - zsv_row_length_raw_bytes(wctx->parser);
|
|
135
|
+
zsv_abort(wctx->parser);
|
|
136
|
+
wctx->cancelled = 1;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
static void worker_row(void *ctx) {
|
|
140
|
+
struct worker_ctx *wctx = ctx;
|
|
141
|
+
wctx->cdata->row_count++;
|
|
142
|
+
|
|
143
|
+
if (UNLIKELY((off_t)zsv_cum_scanned_length(wctx->parser) >= wctx->limit_len)) {
|
|
144
|
+
zsv_set_row_handler(wctx->parser, worker_row_done);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
static void *process_chunk_thread(void *arg) {
|
|
149
|
+
struct zsv_chunk_count_data *cdata = arg;
|
|
150
|
+
return process_chunk_internal(cdata);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
static void *process_chunk_internal(struct zsv_chunk_count_data *cdata) {
|
|
154
|
+
cdata->row_count = 0;
|
|
155
|
+
cdata->status = 0;
|
|
156
|
+
|
|
157
|
+
if (cdata->start_offset >= cdata->end_offset) {
|
|
158
|
+
cdata->actual_next_row_start = cdata->start_offset;
|
|
159
|
+
cdata->skip = 1;
|
|
160
|
+
return NULL;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
struct zsv_opts opts = *cdata->opts_template;
|
|
164
|
+
struct worker_ctx wctx = {0};
|
|
165
|
+
wctx.cdata = cdata;
|
|
166
|
+
wctx.limit_len = cdata->end_offset - cdata->start_offset;
|
|
167
|
+
|
|
168
|
+
FILE *f = fopen(cdata->input_path, "rb");
|
|
169
|
+
if (!f) {
|
|
170
|
+
cdata->status = zsv_status_error;
|
|
171
|
+
return NULL;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (fseeko(f, cdata->start_offset, SEEK_SET) != 0) {
|
|
175
|
+
fclose(f);
|
|
176
|
+
cdata->status = zsv_status_error;
|
|
177
|
+
return NULL;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
opts.stream = f;
|
|
181
|
+
opts.ctx = &wctx;
|
|
182
|
+
opts.row_handler = worker_row;
|
|
183
|
+
|
|
184
|
+
wctx.parser = zsv_new(&opts);
|
|
185
|
+
if (wctx.parser == NULL) {
|
|
186
|
+
fclose(f);
|
|
187
|
+
cdata->status = zsv_status_error;
|
|
188
|
+
return NULL;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
enum zsv_status status = zsv_status_ok;
|
|
192
|
+
while (status == zsv_status_ok && !wctx.cancelled) {
|
|
193
|
+
status = zsv_parse_more(wctx.parser);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// if finished naturally (eof)
|
|
197
|
+
if (!wctx.cancelled) {
|
|
198
|
+
cdata->actual_next_row_start = cdata->start_offset + zsv_cum_scanned_length(wctx.parser);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
zsv_finish(wctx.parser);
|
|
202
|
+
zsv_delete(wctx.parser);
|
|
203
|
+
fclose(f);
|
|
204
|
+
return NULL;
|
|
205
|
+
}
|
|
206
|
+
#endif
|
|
207
|
+
|
|
208
|
+
static void header_handler(void *ctx) {
|
|
209
|
+
struct data *data = ctx;
|
|
210
|
+
#ifndef ZSV_NO_PARALLEL
|
|
211
|
+
if (data->input_path && data->num_chunks > 1) {
|
|
212
|
+
size_t header_end = zsv_cum_scanned_length(data->parser);
|
|
213
|
+
struct zsv_chunk_position *offsets =
|
|
214
|
+
zsv_guess_file_chunks(data->input_path, data->num_chunks, ZSV_COUNT_PARALLEL_MIN_BYTES, header_end
|
|
215
|
+
#ifndef ZSV_NO_ONLY_CRLF
|
|
216
|
+
,
|
|
217
|
+
data->opts->only_crlf_rowend
|
|
218
|
+
#endif
|
|
219
|
+
);
|
|
220
|
+
|
|
221
|
+
if (offsets) {
|
|
222
|
+
data->pdata = parallel_data_new(data->num_chunks);
|
|
223
|
+
if (!data->pdata) {
|
|
224
|
+
fprintf(stderr, "Out of memory!\n");
|
|
225
|
+
zsv_free_chunks(offsets);
|
|
226
|
+
} else {
|
|
227
|
+
data->run_in_parallel = 1;
|
|
228
|
+
if (data->opts->verbose) {
|
|
229
|
+
for (unsigned int i = 0; i < data->num_chunks; i++) {
|
|
230
|
+
fprintf(stderr, "Chunk %i: %zu - %zu\n", i + 1, offsets[i].start, offsets[i].end);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/* set up worker chunks (1..n) */
|
|
235
|
+
for (unsigned int i = 1; i < data->num_chunks; i++) {
|
|
236
|
+
struct zsv_chunk_count_data *c = &data->pdata->chunks[i];
|
|
237
|
+
c->id = i;
|
|
238
|
+
c->start_offset = offsets[i].start;
|
|
239
|
+
c->end_offset = offsets[i].end;
|
|
240
|
+
c->input_path = data->input_path;
|
|
241
|
+
c->opts_template = data->opts;
|
|
242
|
+
|
|
243
|
+
if (pthread_create(&data->pdata->threads[i], NULL, process_chunk_thread, c) != 0) {
|
|
244
|
+
fprintf(stderr, "Error creating thread %d\n", i);
|
|
245
|
+
data->run_in_parallel = 0;
|
|
246
|
+
break;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (data->run_in_parallel) {
|
|
251
|
+
data->end_offset_limit = offsets[0].end;
|
|
252
|
+
zsv_set_row_handler(data->parser, row_parallel);
|
|
253
|
+
data->run_in_parallel = 1;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
zsv_free_chunks(offsets);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
#endif
|
|
260
|
+
|
|
261
|
+
if (!data->run_in_parallel) { // single-threaded serial run
|
|
262
|
+
data->run_in_parallel = 0;
|
|
263
|
+
zsv_set_row_handler(data->parser, data->opts->verbose ? row_verbose : row_simple);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
static int count_usage(void) {
|
|
268
|
+
static const char *usage = "Usage: count [options]\n"
|
|
269
|
+
"\n"
|
|
270
|
+
"Options:\n"
|
|
271
|
+
" -h,--help : show usage\n"
|
|
272
|
+
" -i,--input <filename> : use specified file input\n"
|
|
273
|
+
#ifndef ZSV_NO_PARALLEL
|
|
274
|
+
" -j,--jobs <n> : number of jobs (parallel threads)\n"
|
|
275
|
+
" --parallel : use all available cores\n"
|
|
276
|
+
#endif
|
|
277
|
+
;
|
|
278
|
+
printf("%s\n", usage);
|
|
279
|
+
return 0;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
|
|
283
|
+
struct zsv_prop_handler *custom_prop_handler) {
|
|
284
|
+
struct data data = {0};
|
|
285
|
+
struct zsv_opts opts = *optsp;
|
|
286
|
+
data.opts = &opts;
|
|
287
|
+
|
|
288
|
+
int err = 0;
|
|
289
|
+
for (int i = 1; !err && i < argc; i++) {
|
|
290
|
+
const char *arg = argv[i];
|
|
291
|
+
if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
|
|
292
|
+
count_usage();
|
|
293
|
+
goto count_done;
|
|
294
|
+
}
|
|
295
|
+
if (!strcmp(arg, "-i") || !strcmp(arg, "--input") || *arg != '-') {
|
|
296
|
+
err = 1;
|
|
297
|
+
if ((!strcmp(arg, "-i") || !strcmp(arg, "--input")) && ++i >= argc)
|
|
298
|
+
fprintf(stderr, "%s option requires a filename\n", arg);
|
|
299
|
+
else {
|
|
300
|
+
if (opts.stream)
|
|
301
|
+
fprintf(stderr, "Input may not be specified more than once\n");
|
|
302
|
+
else if (!(opts.stream = fopen(argv[i], "rb")))
|
|
303
|
+
fprintf(stderr, "Unable to open for reading: %s\n", argv[i]);
|
|
304
|
+
else {
|
|
305
|
+
data.input_path = argv[i];
|
|
306
|
+
err = 0;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
#ifndef ZSV_NO_PARALLEL
|
|
310
|
+
} else if (!strcmp(arg, "-j") || !strcmp(arg, "--jobs")) {
|
|
311
|
+
if (++i >= argc)
|
|
312
|
+
err = 1;
|
|
313
|
+
else
|
|
314
|
+
data.num_chunks = atoi(argv[i]);
|
|
315
|
+
} else if (!strcmp(arg, "--parallel")) {
|
|
316
|
+
data.num_chunks = zsv_get_number_of_cores();
|
|
317
|
+
if (data.num_chunks < 2) {
|
|
318
|
+
fprintf(stderr, "Warning: --parallel specified but only one core found; using -j 4 instead");
|
|
319
|
+
data.num_chunks = 4;
|
|
320
|
+
}
|
|
321
|
+
#endif
|
|
322
|
+
} else {
|
|
323
|
+
fprintf(stderr, "Unrecognized option: %s\n", arg);
|
|
324
|
+
err = 1;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
#ifdef NO_STDIN
|
|
329
|
+
if (!opts.stream || opts.stream == stdin) {
|
|
330
|
+
fprintf(stderr, "Please specify an input file\n");
|
|
331
|
+
err = 1;
|
|
332
|
+
}
|
|
333
|
+
#endif
|
|
334
|
+
#ifndef ZSV_NO_PARALLEL
|
|
335
|
+
if (data.num_chunks > 1) {
|
|
336
|
+
enum zsv_chunk_status chstat = zsv_chunkable(data.input_path, &opts);
|
|
337
|
+
if (chstat != zsv_chunk_status_ok) {
|
|
338
|
+
fprintf(stderr, "%s\n", zsv_chunk_status_str(chstat));
|
|
339
|
+
err = 1;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
#endif
|
|
343
|
+
if (!err) {
|
|
344
|
+
opts.row_handler = header_handler;
|
|
345
|
+
opts.ctx = &data;
|
|
346
|
+
|
|
347
|
+
if (zsv_new_with_properties(&opts, custom_prop_handler, data.input_path, &data.parser) != zsv_status_ok) {
|
|
348
|
+
fprintf(stderr, "Unable to initialize parser\n");
|
|
349
|
+
err = 1;
|
|
350
|
+
} else {
|
|
351
|
+
enum zsv_status status;
|
|
352
|
+
|
|
353
|
+
/* Main Parse Loop */
|
|
354
|
+
while (!data.cancelled && (status = zsv_parse_more(data.parser)) == zsv_status_ok)
|
|
355
|
+
;
|
|
356
|
+
zsv_finish(data.parser);
|
|
357
|
+
|
|
358
|
+
#ifndef ZSV_NO_PARALLEL
|
|
359
|
+
if (data.run_in_parallel) {
|
|
360
|
+
if (!data.next_row_start)
|
|
361
|
+
// not likely to get here but just in case
|
|
362
|
+
data.next_row_start = zsv_cum_scanned_length(data.parser);
|
|
363
|
+
|
|
364
|
+
size_t total_rows = data.rows;
|
|
365
|
+
// aggregate results
|
|
366
|
+
for (unsigned int i = 1; i < data.num_chunks; i++) {
|
|
367
|
+
pthread_join(data.pdata->threads[i], NULL);
|
|
368
|
+
|
|
369
|
+
struct zsv_chunk_count_data *prev_chunk = (i == 1) ? NULL : &data.pdata->chunks[i - 1];
|
|
370
|
+
struct zsv_chunk_count_data *curr_chunk = &data.pdata->chunks[i];
|
|
371
|
+
|
|
372
|
+
// determine where the previous chunk actually ended
|
|
373
|
+
size_t prev_end = (i == 1) ? data.next_row_start : prev_chunk->actual_next_row_start;
|
|
374
|
+
// check overlap
|
|
375
|
+
if (prev_end > curr_chunk->start_offset) {
|
|
376
|
+
if (data.opts->verbose) {
|
|
377
|
+
fprintf(stderr, "Overlap detected at chunk %u (expected %zu, got %zu). Reprocessing.\n", i,
|
|
378
|
+
curr_chunk->start_offset, prev_end);
|
|
379
|
+
}
|
|
380
|
+
// reprocess synchronously
|
|
381
|
+
curr_chunk->start_offset = prev_end;
|
|
382
|
+
process_chunk_internal(curr_chunk);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
total_rows += curr_chunk->row_count;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
printf("%zu\n", total_rows);
|
|
389
|
+
parallel_data_delete(data.pdata);
|
|
390
|
+
|
|
391
|
+
} else
|
|
392
|
+
#endif
|
|
393
|
+
// result from running serially
|
|
394
|
+
printf("%zu\n", data.rows);
|
|
395
|
+
zsv_delete(data.parser);
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
count_done:
|
|
400
|
+
if (opts.stream && opts.stream != stdin)
|
|
401
|
+
fclose(opts.stream);
|
|
402
|
+
|
|
403
|
+
return err;
|
|
404
|
+
}
|