zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <string.h>
|
|
3
|
+
#include <stdlib.h>
|
|
4
|
+
#include <unistd.h> // unlink()
|
|
5
|
+
|
|
6
|
+
#include <errno.h>
|
|
7
|
+
#include <zsv/utils/cache.h>
|
|
8
|
+
#include <zsv/utils/jq.h>
|
|
9
|
+
|
|
10
|
+
#ifndef APPNAME
|
|
11
|
+
#define APPNAME "cache"
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
#include <zsv/utils/os.h>
|
|
15
|
+
#include <zsv/utils/err.h>
|
|
16
|
+
#include <zsv/utils/dirs.h>
|
|
17
|
+
#include <zsv/utils/file.h>
|
|
18
|
+
|
|
19
|
+
static const char *zsv_cache_type_name(enum zsv_cache_type t) {
|
|
20
|
+
switch (t) {
|
|
21
|
+
case zsv_cache_type_property:
|
|
22
|
+
return ZSV_CACHE_PROPERTIES_NAME ".json";
|
|
23
|
+
case zsv_cache_type_tag:
|
|
24
|
+
return "tag.json";
|
|
25
|
+
case zsv_cache_type_overwrite:
|
|
26
|
+
return "overwrite.sqlite3";
|
|
27
|
+
default:
|
|
28
|
+
return NULL;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
unsigned char *zsv_cache_filepath(const unsigned char *data_filepath, enum zsv_cache_type type, char create_dir,
|
|
33
|
+
char temp_file) {
|
|
34
|
+
if (!data_filepath || !*data_filepath)
|
|
35
|
+
return NULL;
|
|
36
|
+
|
|
37
|
+
const char *cache_filename_base = zsv_cache_type_name(type);
|
|
38
|
+
if (!cache_filename_base) {
|
|
39
|
+
zsv_printerr(ENOMEM, "Out of memory!");
|
|
40
|
+
return NULL;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
unsigned char *cache_filename;
|
|
44
|
+
asprintf((char **)&cache_filename, "%s%s", cache_filename_base, temp_file ? ZSV_TEMPFILE_SUFFIX : "");
|
|
45
|
+
|
|
46
|
+
unsigned char *s = cache_filename ? zsv_cache_path(data_filepath, cache_filename, 0) : NULL;
|
|
47
|
+
if (s && create_dir) {
|
|
48
|
+
char *last_slash_s = (char *)strrchr((void *)s, FILESLASH);
|
|
49
|
+
int err = 0;
|
|
50
|
+
|
|
51
|
+
// temporarily truncate the string so as to only leave its parent folder
|
|
52
|
+
*last_slash_s = '\0';
|
|
53
|
+
|
|
54
|
+
// ensure the parent dir exists
|
|
55
|
+
if (!zsv_dir_exists((char *)s))
|
|
56
|
+
err = zsv_mkdirs((char *)s, 0);
|
|
57
|
+
if (err) {
|
|
58
|
+
fprintf(stderr, "Unable to create cache directory %s\n", s);
|
|
59
|
+
free(s);
|
|
60
|
+
s = NULL;
|
|
61
|
+
} else
|
|
62
|
+
*last_slash_s = FILESLASH;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
free(cache_filename);
|
|
66
|
+
return s;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/*
|
|
70
|
+
* print cache file to stdout
|
|
71
|
+
*/
|
|
72
|
+
int zsv_cache_print(const unsigned char *filepath, enum zsv_cache_type ctype, const unsigned char *default_value) {
|
|
73
|
+
int err = 0;
|
|
74
|
+
// to do: parse the json rather than just blindly regurgitating the file
|
|
75
|
+
unsigned char *cache_fn = zsv_cache_filepath(filepath, ctype, 0, 0);
|
|
76
|
+
if (cache_fn) {
|
|
77
|
+
FILE *f;
|
|
78
|
+
if (zsv_file_readable((char *)cache_fn, &err, &f)) {
|
|
79
|
+
char buff[1024];
|
|
80
|
+
size_t bytes;
|
|
81
|
+
while ((bytes = fread(buff, 1, sizeof(buff), f)))
|
|
82
|
+
fwrite(buff, 1, bytes, stdout);
|
|
83
|
+
fclose(f);
|
|
84
|
+
} else if (err == ENOENT) {
|
|
85
|
+
if (default_value)
|
|
86
|
+
printf("%s\n", default_value);
|
|
87
|
+
} else {
|
|
88
|
+
perror((const char *)cache_fn);
|
|
89
|
+
if (!err)
|
|
90
|
+
err = 1;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
free(cache_fn);
|
|
94
|
+
return err;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/*
|
|
98
|
+
* remove a cache file
|
|
99
|
+
*/
|
|
100
|
+
|
|
101
|
+
int zsv_cache_remove(const unsigned char *filepath, enum zsv_cache_type ctype) {
|
|
102
|
+
int err = 0;
|
|
103
|
+
unsigned char *fn = zsv_cache_filepath(filepath, ctype, 0, 0);
|
|
104
|
+
if (!fn)
|
|
105
|
+
err = ENOMEM;
|
|
106
|
+
else if (zsv_file_readable((const char *)fn, &err, NULL)) {
|
|
107
|
+
err = unlink((const char *)fn);
|
|
108
|
+
if (err)
|
|
109
|
+
perror((const char *)fn);
|
|
110
|
+
} else if (err == ENOENT)
|
|
111
|
+
err = 0; // file d.n. exist, nothing to do
|
|
112
|
+
else
|
|
113
|
+
perror((const char *)fn);
|
|
114
|
+
free(fn);
|
|
115
|
+
return err;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/*
|
|
119
|
+
* modify a JSON cache file, write to tmp file, then replace the cache file
|
|
120
|
+
*/
|
|
121
|
+
int zsv_modify_cache_file(const unsigned char *filepath, enum zsv_cache_type ctype, const unsigned char *json_value1,
|
|
122
|
+
const unsigned char *json_value2, const unsigned char *filter) {
|
|
123
|
+
unsigned char *cache_fn = zsv_cache_filepath((const unsigned char *)filepath, ctype, 0, 0);
|
|
124
|
+
unsigned char *cache_tmp_fn = zsv_cache_filepath((const unsigned char *)filepath, ctype, 1, 1);
|
|
125
|
+
FILE *cache_data = NULL;
|
|
126
|
+
if (!(cache_fn && cache_tmp_fn))
|
|
127
|
+
return zsv_printerr(ENOMEM, "Out of memory!");
|
|
128
|
+
|
|
129
|
+
cache_data = zsv_fopen((void *)cache_fn, "rb");
|
|
130
|
+
int err = 0;
|
|
131
|
+
if (!cache_data) {
|
|
132
|
+
err = errno;
|
|
133
|
+
if (err == ENOENT)
|
|
134
|
+
err = 0;
|
|
135
|
+
else { // file exists but could not be opened
|
|
136
|
+
perror((const char *)cache_fn);
|
|
137
|
+
return err;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (cache_data) {
|
|
142
|
+
// check that we have at least 1 byte of data
|
|
143
|
+
fseek(cache_data, 1, SEEK_SET);
|
|
144
|
+
if (!ftell(cache_data)) { // empty file; will use default value of "{}"
|
|
145
|
+
fclose(cache_data);
|
|
146
|
+
cache_data = NULL;
|
|
147
|
+
} else
|
|
148
|
+
fseek(cache_data, 0, SEEK_SET);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// jq filter to apply to [current_properties, id, value]
|
|
152
|
+
FILE *tmp = zsv_fopen((const char *)cache_tmp_fn, "wb");
|
|
153
|
+
if (!tmp) {
|
|
154
|
+
if (!(err = errno))
|
|
155
|
+
err = 1;
|
|
156
|
+
perror((const char *)cache_tmp_fn);
|
|
157
|
+
} else {
|
|
158
|
+
struct jv_to_json_ctx ctx = {0};
|
|
159
|
+
ctx.write1 = zsv_jq_fwrite1;
|
|
160
|
+
ctx.ctx = tmp;
|
|
161
|
+
ctx.flags = JV_PRINT_PRETTY | JV_PRINT_SPACE1;
|
|
162
|
+
enum zsv_jq_status jqstat;
|
|
163
|
+
void *jqh = zsv_jq_new(filter, jv_to_json_func, &ctx, &jqstat);
|
|
164
|
+
if (jqstat || !jqh)
|
|
165
|
+
err = zsv_printerr(-1, "Unable to initialize jq filter");
|
|
166
|
+
else if (!(jqstat = zsv_jq_parse(jqh, "[", 1))) {
|
|
167
|
+
if (cache_data)
|
|
168
|
+
jqstat = zsv_jq_parse_file(jqh, cache_data);
|
|
169
|
+
else
|
|
170
|
+
jqstat = zsv_jq_parse(jqh, "{}", 2);
|
|
171
|
+
if (!jqstat && !(jqstat = zsv_jq_parse(jqh, ",", 1)) &&
|
|
172
|
+
!(jqstat = zsv_jq_parse(jqh, json_value1, strlen((void *)json_value1))) &&
|
|
173
|
+
!(jqstat = zsv_jq_parse(jqh, ",", 1)) &&
|
|
174
|
+
!(jqstat = zsv_jq_parse(jqh, json_value2, strlen((void *)json_value2))) &&
|
|
175
|
+
!(jqstat = zsv_jq_parse(jqh, "]", 1)) && !(jqstat = zsv_jq_finish(jqh))) {
|
|
176
|
+
;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
zsv_jq_delete(jqh);
|
|
180
|
+
|
|
181
|
+
if (cache_data) {
|
|
182
|
+
fclose(cache_data);
|
|
183
|
+
cache_data = NULL;
|
|
184
|
+
}
|
|
185
|
+
fclose(tmp);
|
|
186
|
+
|
|
187
|
+
if (!jqstat && zsv_replace_file(cache_tmp_fn, cache_fn)) {
|
|
188
|
+
err = zsv_printerr(-1, "Unable to save %s: ", cache_fn);
|
|
189
|
+
zsv_perror(NULL);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (cache_data)
|
|
194
|
+
fclose(cache_data);
|
|
195
|
+
free(cache_fn);
|
|
196
|
+
free(cache_tmp_fn);
|
|
197
|
+
return err;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Returns the folder or file path to the cache for a given data file
|
|
202
|
+
* Caller must free the returned result
|
|
203
|
+
*/
|
|
204
|
+
unsigned char *zsv_cache_path(const unsigned char *data_filepath, const unsigned char *cache_filename, char temp_file) {
|
|
205
|
+
if (!data_filepath)
|
|
206
|
+
return NULL;
|
|
207
|
+
const unsigned char *last_slash = (void *)strrchr((void *)data_filepath, '/');
|
|
208
|
+
const unsigned char *last_backslash = (void *)strrchr((void *)data_filepath, '\\');
|
|
209
|
+
const unsigned char *dir_end = (!last_slash && !last_backslash ? NULL
|
|
210
|
+
: last_backslash > last_slash ? last_backslash
|
|
211
|
+
: last_slash);
|
|
212
|
+
char *s = NULL;
|
|
213
|
+
char *filename_suffix = NULL;
|
|
214
|
+
if (cache_filename)
|
|
215
|
+
asprintf(&filename_suffix, "%c%s%s", FILESLASH, cache_filename, temp_file ? ZSV_TEMPFILE_SUFFIX : "");
|
|
216
|
+
|
|
217
|
+
if (!dir_end) // file is in current dir
|
|
218
|
+
asprintf(&s, ZSV_CACHE_DIR "%c%s%s", FILESLASH, data_filepath, filename_suffix ? filename_suffix : "");
|
|
219
|
+
else if (dir_end[1]) {
|
|
220
|
+
asprintf(&s, "%.*s%c" ZSV_CACHE_DIR "%c%s%s", (int)(dir_end - data_filepath), data_filepath, FILESLASH, FILESLASH,
|
|
221
|
+
dir_end + 1, filename_suffix ? filename_suffix : "");
|
|
222
|
+
for (int i = 0; s && s[i]; i++)
|
|
223
|
+
if (s[i] != FILESLASH && (s[i] == '/' || s[i] == '\\'))
|
|
224
|
+
s[i] = FILESLASH;
|
|
225
|
+
}
|
|
226
|
+
free(filename_suffix);
|
|
227
|
+
return (unsigned char *)s;
|
|
228
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <sys/types.h>
|
|
3
|
+
#include <fcntl.h>
|
|
4
|
+
#include <unistd.h>
|
|
5
|
+
#include <stdlib.h> // malloc/free
|
|
6
|
+
#include <errno.h> // error reporting
|
|
7
|
+
|
|
8
|
+
// Define a reasonable buffer size for the buffered copy
|
|
9
|
+
#define COPY_BUFFER_SIZE (1024 * 64) // 64KB
|
|
10
|
+
|
|
11
|
+
#ifdef _WIN32
|
|
12
|
+
// Windows target (via mingw64)
|
|
13
|
+
#include <io.h> // _get_osfhandle
|
|
14
|
+
#include <windows.h> // HANDLE, ReadFile, WriteFile
|
|
15
|
+
#else
|
|
16
|
+
// POSIX target (Linux/macOS)
|
|
17
|
+
#include <sys/stat.h> // fstat, stat
|
|
18
|
+
// #include <sys/uio.h> // macOS/BSD sendfile() definition
|
|
19
|
+
#ifdef __linux__
|
|
20
|
+
#include <sys/sendfile.h> // only on Linux
|
|
21
|
+
#endif
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
// concatenate two files. if possible, use zero-copy via sendfile
|
|
25
|
+
long zsv_concatenate_copy(int out_fd, int in_fd, off_t size) {
|
|
26
|
+
long total_written = 0;
|
|
27
|
+
|
|
28
|
+
#ifdef _WIN32
|
|
29
|
+
// --- windows: buffered copy via native apis
|
|
30
|
+
HANDLE hOut = (HANDLE)_get_osfhandle(out_fd);
|
|
31
|
+
HANDLE hIn = (HANDLE)_get_osfhandle(in_fd);
|
|
32
|
+
if (hOut == INVALID_HANDLE_VALUE || hIn == INVALID_HANDLE_VALUE)
|
|
33
|
+
return -1;
|
|
34
|
+
|
|
35
|
+
char *buffer = malloc(COPY_BUFFER_SIZE);
|
|
36
|
+
if (!buffer)
|
|
37
|
+
return -1;
|
|
38
|
+
|
|
39
|
+
DWORD bytes_read, bytes_written;
|
|
40
|
+
BOOL result;
|
|
41
|
+
|
|
42
|
+
while (total_written < size) {
|
|
43
|
+
DWORD bytes_to_read =
|
|
44
|
+
(DWORD)((size - total_written < COPY_BUFFER_SIZE) ? (size - total_written) : COPY_BUFFER_SIZE);
|
|
45
|
+
|
|
46
|
+
result = ReadFile(hIn, buffer, bytes_to_read, &bytes_read, NULL);
|
|
47
|
+
if (!result || bytes_read == 0) {
|
|
48
|
+
free(buffer);
|
|
49
|
+
return -1;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
result = WriteFile(hOut, buffer, bytes_read, &bytes_written, NULL);
|
|
53
|
+
if (!result || bytes_written != bytes_read) {
|
|
54
|
+
free(buffer);
|
|
55
|
+
return -1;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
total_written += bytes_written;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
free(buffer);
|
|
62
|
+
return total_written;
|
|
63
|
+
|
|
64
|
+
#elif defined(__linux__)
|
|
65
|
+
// --- linux: zero-copy! ---
|
|
66
|
+
off_t offset = 0;
|
|
67
|
+
long bytes_to_copy = size;
|
|
68
|
+
// sendfile: target_fd, source_fd, offset*, count
|
|
69
|
+
long result = sendfile(out_fd, in_fd, &offset, bytes_to_copy);
|
|
70
|
+
return result;
|
|
71
|
+
|
|
72
|
+
#else
|
|
73
|
+
(void)(size);
|
|
74
|
+
// --- generic posix fallback (buffered copy) ---
|
|
75
|
+
char *buffer = malloc(COPY_BUFFER_SIZE);
|
|
76
|
+
if (!buffer)
|
|
77
|
+
return -1;
|
|
78
|
+
|
|
79
|
+
ssize_t read_bytes, write_bytes;
|
|
80
|
+
while ((read_bytes = read(in_fd, buffer, COPY_BUFFER_SIZE)) > 0) {
|
|
81
|
+
write_bytes = write(out_fd, buffer, read_bytes);
|
|
82
|
+
if (write_bytes != read_bytes) {
|
|
83
|
+
free(buffer);
|
|
84
|
+
return -1;
|
|
85
|
+
}
|
|
86
|
+
total_written += write_bytes;
|
|
87
|
+
}
|
|
88
|
+
free(buffer);
|
|
89
|
+
return (read_bytes == 0) ? total_written : -1;
|
|
90
|
+
#endif
|
|
91
|
+
}
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
// /src/app/utils/chunk.c: implements /src/app/utils/chunk.h
|
|
2
|
+
|
|
3
|
+
#include <sys/stat.h>
|
|
4
|
+
#include <stdio.h>
|
|
5
|
+
#include <stdlib.h>
|
|
6
|
+
#include <string.h>
|
|
7
|
+
|
|
8
|
+
#include "chunk.h"
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @brief Checks if a character is a newline character ('\n' or '\r').
|
|
12
|
+
* @param c The character to check.
|
|
13
|
+
* @return int 1 if newline, 0 otherwise.
|
|
14
|
+
*/
|
|
15
|
+
static int zsv_is_newline(char c) {
|
|
16
|
+
return (c == '\n' || c == '\r');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @brief Scans forward from an initial offset to find the first position after a newline sequence.
|
|
21
|
+
*
|
|
22
|
+
* @param fp The open file pointer.
|
|
23
|
+
* @param initial_offset The starting point of the search (nominal boundary).
|
|
24
|
+
* @param boundary The absolute maximum file size (total_size).
|
|
25
|
+
* @param only_crlf If non-zero, only treat \r\n as a newline.
|
|
26
|
+
* @return zsv_file_pos The position after the newline sequence, or -1 if not found.
|
|
27
|
+
*/
|
|
28
|
+
static zsv_file_pos zsv_find_chunk_start(FILE *fp, zsv_file_pos initial_offset, zsv_file_pos boundary, int only_crlf) {
|
|
29
|
+
char c;
|
|
30
|
+
// Seek to the initial offset.
|
|
31
|
+
if (fseek(fp, initial_offset, SEEK_SET) != 0) {
|
|
32
|
+
return -1; // Seek error
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Scan forward for the start of a newline sequence
|
|
36
|
+
while (ftell(fp) < boundary && fread(&c, 1, 1, fp) == 1) {
|
|
37
|
+
if (only_crlf) {
|
|
38
|
+
if (c == '\r') {
|
|
39
|
+
// We found a CR. Check immediately if the next char is LF.
|
|
40
|
+
char next;
|
|
41
|
+
if (ftell(fp) < boundary && fread(&next, 1, 1, fp) == 1) {
|
|
42
|
+
if (next == '\n') {
|
|
43
|
+
// Found \r\n sequence. The chunk starts immediately after.
|
|
44
|
+
return ftell(fp);
|
|
45
|
+
}
|
|
46
|
+
// The next char was NOT \n.
|
|
47
|
+
// We must rewind one byte so the loop processes 'next' correctly
|
|
48
|
+
// (in case 'next' is itself a \r starting a valid sequence).
|
|
49
|
+
fseek(fp, -1, SEEK_CUR);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
if (zsv_is_newline(c)) {
|
|
54
|
+
// Found the start of a sequence. Scan past all consecutive newline characters.
|
|
55
|
+
zsv_file_pos position_after_newline = ftell(fp);
|
|
56
|
+
|
|
57
|
+
while (position_after_newline < boundary && fread(&c, 1, 1, fp) == 1) {
|
|
58
|
+
if (zsv_is_newline(c)) {
|
|
59
|
+
position_after_newline = ftell(fp); // Keep tracking position past the sequence
|
|
60
|
+
} else {
|
|
61
|
+
// Found the first non-newline character.
|
|
62
|
+
// The new start is at the current position (one byte past the last read)
|
|
63
|
+
// so we return the start of that character (ftell - 1).
|
|
64
|
+
return ftell(fp) - 1;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// If inner loop breaks due to EOF, return -1
|
|
68
|
+
return -1;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Reached EOF/boundary without finding a valid split point
|
|
74
|
+
return -1;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
static int zsv_read_first_line_at_offset(const char *filename, zsv_file_pos offset, char *buffer, size_t buf_size) {
|
|
78
|
+
FILE *fp = fopen(filename, "rb");
|
|
79
|
+
if (fp == NULL) {
|
|
80
|
+
perror("zsv_read_first_line_at_offset: Failed to open file");
|
|
81
|
+
return -1;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (offset < 0 || fseek(fp, offset, SEEK_SET) != 0) {
|
|
85
|
+
fprintf(stderr, "zsv_read_first_line_at_offset: Error: Invalid offset or fseek failed at %lld\n",
|
|
86
|
+
(long long)offset);
|
|
87
|
+
fclose(fp);
|
|
88
|
+
return -1;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Use fgets. It handles both \n and \r\n line endings appropriately.
|
|
92
|
+
if (fgets(buffer, (int)buf_size, fp) == NULL) {
|
|
93
|
+
if (feof(fp)) {
|
|
94
|
+
buffer[0] = '\0'; // Empty chunk
|
|
95
|
+
} else {
|
|
96
|
+
perror("zsv_read_first_line_at_offset: fgets failed");
|
|
97
|
+
fclose(fp);
|
|
98
|
+
return -1;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Remove the trailing newline sequence (CRLF or LF) for clean output (DRY cleanup logic)
|
|
103
|
+
size_t len = strlen(buffer);
|
|
104
|
+
if (len > 0) {
|
|
105
|
+
// Check for LF
|
|
106
|
+
if (buffer[len - 1] == '\n') {
|
|
107
|
+
buffer[--len] = '\0';
|
|
108
|
+
}
|
|
109
|
+
// Check for CR (handles both bare CR and the CR in CRLF)
|
|
110
|
+
if (len > 0 && buffer[len - 1] == '\r') {
|
|
111
|
+
buffer[len - 1] = '\0';
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
fclose(fp);
|
|
116
|
+
return 0;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// --- Public Library Implementations ---
|
|
120
|
+
|
|
121
|
+
struct zsv_chunk_position *zsv_guess_file_chunks(const char *filename, uint64_t N, uint64_t min_size,
|
|
122
|
+
zsv_file_pos initial_offset
|
|
123
|
+
#ifndef ZSV_NO_ONLY_CRLF
|
|
124
|
+
,
|
|
125
|
+
int only_crlf
|
|
126
|
+
#endif
|
|
127
|
+
) {
|
|
128
|
+
|
|
129
|
+
#ifdef ZSV_NO_ONLY_CRLF
|
|
130
|
+
int only_crlf = 0;
|
|
131
|
+
#endif
|
|
132
|
+
if (N == 0)
|
|
133
|
+
return NULL;
|
|
134
|
+
|
|
135
|
+
// Open in binary mode ('rb') is crucial for accurate byte counts.
|
|
136
|
+
FILE *fp = fopen(filename, "rb");
|
|
137
|
+
if (fp == NULL) {
|
|
138
|
+
perror("zsv_guess_file_chunks: Failed to open file");
|
|
139
|
+
return NULL;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// 1. Get total file size using fstat()
|
|
143
|
+
struct stat st;
|
|
144
|
+
if (fstat(fileno(fp), &st) == -1) {
|
|
145
|
+
perror("zsv_guess_file_chunks: fstat failed");
|
|
146
|
+
fclose(fp);
|
|
147
|
+
return NULL;
|
|
148
|
+
}
|
|
149
|
+
zsv_file_pos total_size = (zsv_file_pos)st.st_size;
|
|
150
|
+
if (total_size < initial_offset) {
|
|
151
|
+
perror("zsv_guess_file_chunks: initial_offset exceeds file size");
|
|
152
|
+
fclose(fp);
|
|
153
|
+
return NULL;
|
|
154
|
+
}
|
|
155
|
+
total_size -= initial_offset;
|
|
156
|
+
|
|
157
|
+
if (total_size < (zsv_file_pos)min_size) {
|
|
158
|
+
fprintf(stderr, "file size too small for parallelization\n");
|
|
159
|
+
fclose(fp);
|
|
160
|
+
return NULL;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Allocate memory for the N chunk positions
|
|
164
|
+
struct zsv_chunk_position *chunks = (struct zsv_chunk_position *)malloc(N * sizeof(*chunks));
|
|
165
|
+
if (chunks == NULL) {
|
|
166
|
+
perror("zsv_guess_file_chunks: malloc failed");
|
|
167
|
+
fclose(fp);
|
|
168
|
+
return NULL;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (initial_offset)
|
|
172
|
+
fseek(fp, initial_offset, SEEK_SET);
|
|
173
|
+
|
|
174
|
+
zsv_file_pos base_size = total_size / N;
|
|
175
|
+
zsv_file_pos current_offset = initial_offset;
|
|
176
|
+
|
|
177
|
+
for (uint64_t i = 0; i < N; ++i) {
|
|
178
|
+
chunks[i].start = current_offset;
|
|
179
|
+
|
|
180
|
+
// Calculate the initial nominal boundary for this chunk
|
|
181
|
+
zsv_file_pos nominal_boundary = (i == N - 1) ? total_size : (zsv_file_pos)((i + 1) * base_size);
|
|
182
|
+
|
|
183
|
+
if (i < N - 1) {
|
|
184
|
+
// Adjust the boundary for all but the last chunk
|
|
185
|
+
// Pass the only_crlf flag down to the helper
|
|
186
|
+
zsv_file_pos new_start_offset = zsv_find_chunk_start(fp, nominal_boundary, total_size, only_crlf);
|
|
187
|
+
|
|
188
|
+
if (new_start_offset < 0) {
|
|
189
|
+
// Warning: Could not find a valid split after nominal boundary
|
|
190
|
+
// We use the nominal boundary, which might break a line
|
|
191
|
+
chunks[i].end = nominal_boundary - 1;
|
|
192
|
+
current_offset = nominal_boundary;
|
|
193
|
+
} else {
|
|
194
|
+
chunks[i].end = new_start_offset - 1;
|
|
195
|
+
current_offset = new_start_offset;
|
|
196
|
+
}
|
|
197
|
+
} else {
|
|
198
|
+
// The last chunk always ends at the total_size - 1 byte
|
|
199
|
+
chunks[i].end = total_size + initial_offset > 0 ? total_size + initial_offset - 1 : 0;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Defensive check for inverted start/end
|
|
203
|
+
if (chunks[i].start > chunks[i].end && total_size > 0)
|
|
204
|
+
chunks[i].end = chunks[i].start;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
fclose(fp);
|
|
208
|
+
return chunks;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
void zsv_free_chunks(struct zsv_chunk_position *chunks) {
|
|
212
|
+
if (chunks) {
|
|
213
|
+
free(chunks);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const char *zsv_chunk_status_str(enum zsv_chunk_status stat) {
|
|
218
|
+
switch (stat) {
|
|
219
|
+
case zsv_chunk_status_ok:
|
|
220
|
+
return NULL;
|
|
221
|
+
case zsv_chunk_status_no_file_input:
|
|
222
|
+
return "Parallelization requires a file input";
|
|
223
|
+
case zsv_chunk_status_overwrite:
|
|
224
|
+
return "Parallelization cannot be used with overwrite";
|
|
225
|
+
case zsv_chunk_status_max_rows:
|
|
226
|
+
return "Parallelization cannot be used with -L,--limit-rows";
|
|
227
|
+
}
|
|
228
|
+
return NULL;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
enum zsv_chunk_status zsv_chunkable(const char *inputpath, struct zsv_opts *opts) {
|
|
232
|
+
if (!inputpath)
|
|
233
|
+
return zsv_chunk_status_no_file_input;
|
|
234
|
+
struct zsv_opt_overwrite o = {0};
|
|
235
|
+
if (memcmp(&opts->overwrite, &o, sizeof(o)) || opts->overwrite_auto)
|
|
236
|
+
return zsv_chunk_status_overwrite;
|
|
237
|
+
if (opts->max_rows)
|
|
238
|
+
return zsv_chunk_status_max_rows;
|
|
239
|
+
return zsv_chunk_status_ok;
|
|
240
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#ifndef ZSV_CHUNK_H
|
|
2
|
+
#define ZSV_CHUNK_H
|
|
3
|
+
|
|
4
|
+
#include <zsv/common.h> // struct zsv_opts
|
|
5
|
+
#include <stddef.h> // For size_t
|
|
6
|
+
#include <stdint.h> // For uint64_t
|
|
7
|
+
#include <sys/types.h> // For off_t
|
|
8
|
+
|
|
9
|
+
typedef off_t zsv_file_pos;
|
|
10
|
+
|
|
11
|
+
// Define a struct to hold the (start, end) pair using the standard zsv_file_pos type
|
|
12
|
+
struct zsv_chunk_position {
|
|
13
|
+
zsv_file_pos start;
|
|
14
|
+
zsv_file_pos end;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @brief Divide a file into N chunks for parallel processing.
|
|
19
|
+
*
|
|
20
|
+
* Scans the file to find N approximately equal sections, ensuring that
|
|
21
|
+
* chunk boundaries align with newline sequences so rows are not split.
|
|
22
|
+
*
|
|
23
|
+
* @param filename Path to the file to be chunked.
|
|
24
|
+
* @param N The target number of chunks.
|
|
25
|
+
* @param min_size The minimum file size required to attempt parallelization.
|
|
26
|
+
* @param initial_offset The byte offset to start chunking from (usually 0).
|
|
27
|
+
* @param only_crlf If non-zero, boundaries are split strictly on \r\n sequences.
|
|
28
|
+
* If zero, \r or \n are accepted as boundaries.
|
|
29
|
+
* @return struct zsv_chunk_position* An array of N chunk positions (must be freed by caller),
|
|
30
|
+
* or NULL if the file cannot be chunked or an error occurs.
|
|
31
|
+
*/
|
|
32
|
+
struct zsv_chunk_position *zsv_guess_file_chunks(const char *filename, uint64_t N, uint64_t min_size,
|
|
33
|
+
zsv_file_pos initial_offset
|
|
34
|
+
#ifndef ZSV_NO_ONLY_CRLF
|
|
35
|
+
,
|
|
36
|
+
int only_crlf
|
|
37
|
+
#endif
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* @brief Frees the memory allocated by zsv_guess_file_chunks. (DRY Cleanup)
|
|
42
|
+
* @param chunks The pointer to the allocated chunk array.
|
|
43
|
+
*/
|
|
44
|
+
void zsv_free_chunks(struct zsv_chunk_position *chunks);
|
|
45
|
+
|
|
46
|
+
enum zsv_chunk_status {
|
|
47
|
+
zsv_chunk_status_ok = 0,
|
|
48
|
+
zsv_chunk_status_no_file_input,
|
|
49
|
+
zsv_chunk_status_overwrite,
|
|
50
|
+
zsv_chunk_status_max_rows
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* zsv_chunkable(): check if chunking is compatible wth options; return chunk_status
|
|
55
|
+
*/
|
|
56
|
+
enum zsv_chunk_status zsv_chunkable(const char *inputpath, struct zsv_opts *opts);
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Convert zsv_chunk_status to string description
|
|
60
|
+
*/
|
|
61
|
+
const char *zsv_chunk_status_str(enum zsv_chunk_status stat);
|
|
62
|
+
|
|
63
|
+
#endif // ZSV_CHUNK_H
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include <zsv/utils/clock.h>
|
|
10
|
+
|
|
11
|
+
clock_t zsv_clock_begin;
|
|
12
|
+
clock_t zsv_clock_in;
|
|
13
|
+
clock_t zsv_clock_out;
|
|
14
|
+
int i_tmp;
|
|
15
|
+
|
|
16
|
+
size_t zsv_fread_clock(void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream) {
|
|
17
|
+
clock_t clock_tmp = clock();
|
|
18
|
+
size_t sz = fread(ptr, size, nitems, stream);
|
|
19
|
+
zsv_clock_in += clock() - clock_tmp;
|
|
20
|
+
return sz;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
size_t zsv_fwrite_clock(const void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream) {
|
|
24
|
+
clock_t clock_tmp = clock();
|
|
25
|
+
size_t sz = fwrite(ptr, size, nitems, stream);
|
|
26
|
+
zsv_clock_out += clock() - clock_tmp;
|
|
27
|
+
return sz;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
int zsv_fflush_clock(FILE *stream) {
|
|
31
|
+
clock_t clock_tmp = clock();
|
|
32
|
+
int i = fflush(stream);
|
|
33
|
+
zsv_clock_out += clock() - clock_tmp;
|
|
34
|
+
return i;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
void zsv_clocks_begin(void) {
|
|
38
|
+
zsv_clock_in = zsv_clock_out = 0;
|
|
39
|
+
zsv_clock_begin = clock();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
void zsv_clocks_end(void) {
|
|
43
|
+
clock_t clock_end = clock();
|
|
44
|
+
clock_t clock_total = clock_end - zsv_clock_begin;
|
|
45
|
+
clock_t clock_other = clock_total - zsv_clock_in - zsv_clock_out;
|
|
46
|
+
fprintf(stderr,
|
|
47
|
+
"elapsed time:\n"
|
|
48
|
+
" total %zu, %Lf\n"
|
|
49
|
+
" in %zu, %Lf\n"
|
|
50
|
+
" out %zu, %Lf\n"
|
|
51
|
+
" other %zu, %Lf\n"
|
|
52
|
+
"\n",
|
|
53
|
+
(size_t)(clock_total), (long double)(clock_total) / CLOCKS_PER_SEC, (size_t)zsv_clock_in,
|
|
54
|
+
(long double)(zsv_clock_in) / CLOCKS_PER_SEC, (size_t)zsv_clock_out,
|
|
55
|
+
(long double)(zsv_clock_out) / CLOCKS_PER_SEC, (size_t)clock_other,
|
|
56
|
+
(long double)(clock_other) / CLOCKS_PER_SEC);
|
|
57
|
+
}
|