zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
#include <string.h>
|
|
2
|
+
#include <errno.h>
|
|
3
|
+
|
|
4
|
+
// TO DO: make this a standalone repo
|
|
5
|
+
|
|
6
|
+
// The structure defining our dual-storage stream
|
|
7
|
+
struct zsv_memfile {
|
|
8
|
+
size_t size; // size of buffer
|
|
9
|
+
size_t used; // Actual bytes written to memory
|
|
10
|
+
char *tmp_fn; // name of temp file
|
|
11
|
+
FILE *tmp_f; // temp FILE *
|
|
12
|
+
size_t total_written; // Total bytes written (memory + disk)
|
|
13
|
+
size_t read_offset; // Current read position for the entire stream
|
|
14
|
+
bool write_mode; // Flag to prevent writing after rewind
|
|
15
|
+
char buffer_start; // start of buffer
|
|
16
|
+
};
|
|
17
|
+
typedef struct zsv_memfile zsv_memfile;
|
|
18
|
+
|
|
19
|
+
// --- API Implementation ---
|
|
20
|
+
/**
|
|
21
|
+
* Equivalent to fopen: Allocates and initializes the stream.
|
|
22
|
+
*/
|
|
23
|
+
zsv_memfile *zsv_memfile_open(size_t buffersize) {
|
|
24
|
+
zsv_memfile *zfm = malloc(sizeof(zsv_memfile) + buffersize);
|
|
25
|
+
if (!zfm)
|
|
26
|
+
return NULL;
|
|
27
|
+
memset(zfm, 0, sizeof(zsv_memfile));
|
|
28
|
+
zfm->size = buffersize;
|
|
29
|
+
zfm->write_mode = true;
|
|
30
|
+
return zfm;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
size_t zsv_memfile_write(const void *data, size_t sz, size_t n, zsv_memfile *zfm) {
|
|
34
|
+
if (!zfm || !zfm->write_mode) {
|
|
35
|
+
errno = EPERM; // Operation not permitted
|
|
36
|
+
return 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
size_t nbytes = sz * n;
|
|
40
|
+
if (nbytes == 0)
|
|
41
|
+
return 0;
|
|
42
|
+
|
|
43
|
+
const char *data_ptr = (const char *)data;
|
|
44
|
+
size_t remaining_bytes = nbytes;
|
|
45
|
+
size_t written_total = 0;
|
|
46
|
+
|
|
47
|
+
// 1. Write to Memory (until full)
|
|
48
|
+
if (zfm->used < zfm->size) {
|
|
49
|
+
size_t mem_space_avail = zfm->size - zfm->used;
|
|
50
|
+
size_t write_to_mem = (remaining_bytes < mem_space_avail) ? remaining_bytes : mem_space_avail;
|
|
51
|
+
|
|
52
|
+
memcpy(&zfm->buffer_start + zfm->used, data_ptr, write_to_mem);
|
|
53
|
+
|
|
54
|
+
zfm->used += write_to_mem;
|
|
55
|
+
zfm->total_written += write_to_mem;
|
|
56
|
+
data_ptr += write_to_mem;
|
|
57
|
+
remaining_bytes -= write_to_mem;
|
|
58
|
+
written_total += write_to_mem;
|
|
59
|
+
|
|
60
|
+
// If memory is now full, transition to disk if data remains
|
|
61
|
+
if (zfm->used == zfm->size && remaining_bytes > 0) {
|
|
62
|
+
// Allocate the temporary disk file. We use tmpfile() for simplicity and security.
|
|
63
|
+
zfm->tmp_f = zsv_tmpfile("zfm_", &zfm->tmp_fn, "wb+");
|
|
64
|
+
if (!zfm->tmp_f) {
|
|
65
|
+
perror("Failed to create temporary file");
|
|
66
|
+
return written_total; // Return what was successfully written to memory
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// 2. Write to Disk (for overflow data)
|
|
72
|
+
if (remaining_bytes > 0 && zfm->tmp_f) {
|
|
73
|
+
size_t written_to_disk = fwrite(data_ptr, 1, remaining_bytes, zfm->tmp_f);
|
|
74
|
+
|
|
75
|
+
zfm->total_written += written_to_disk;
|
|
76
|
+
written_total += written_to_disk;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return written_total;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Equivalent to freopen (simplified): Switches stream to read-only mode and resets read pointer.
|
|
84
|
+
* Assumes no further writing will occur.
|
|
85
|
+
*/
|
|
86
|
+
int zsv_memfile_rewind(zsv_memfile *zfm) {
|
|
87
|
+
if (!zfm)
|
|
88
|
+
return -1;
|
|
89
|
+
|
|
90
|
+
// Transition to read-only mode
|
|
91
|
+
zfm->write_mode = false;
|
|
92
|
+
|
|
93
|
+
// Reset the overall read pointer to the start of the combined stream
|
|
94
|
+
zfm->read_offset = 0;
|
|
95
|
+
|
|
96
|
+
// Reset the disk file pointer if it exists, essential for the read logic.
|
|
97
|
+
if (zfm->tmp_f) {
|
|
98
|
+
rewind(zfm->tmp_f);
|
|
99
|
+
}
|
|
100
|
+
return 0;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Equivalent to fread: Reads data seamlessly from memory and then disk.
|
|
105
|
+
* This function enforces the seamless abstraction layer.
|
|
106
|
+
*/
|
|
107
|
+
size_t zsv_memfile_read(void *buffer, size_t size, size_t nitems, zsv_memfile *zfm) {
|
|
108
|
+
if (!zfm || zfm->write_mode) {
|
|
109
|
+
// Must call zsv_memfile_rewind() before reading
|
|
110
|
+
errno = EPERM;
|
|
111
|
+
return 0;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
size_t nbytes = size * nitems;
|
|
115
|
+
char *buffer_ptr = (char *)buffer;
|
|
116
|
+
size_t remaining_bytes = nbytes;
|
|
117
|
+
size_t read_total = 0;
|
|
118
|
+
|
|
119
|
+
// Total available bytes to read across the entire stream
|
|
120
|
+
size_t available_bytes = zfm->total_written - zfm->read_offset;
|
|
121
|
+
if (available_bytes == 0)
|
|
122
|
+
return 0; // EOF
|
|
123
|
+
|
|
124
|
+
// Limit read request to available data
|
|
125
|
+
if (remaining_bytes > available_bytes) {
|
|
126
|
+
remaining_bytes = available_bytes;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// 1. Read from Memory
|
|
130
|
+
if (zfm->read_offset < zfm->size) {
|
|
131
|
+
// Calculate the starting position within the memory buffer
|
|
132
|
+
size_t mem_start = zfm->read_offset;
|
|
133
|
+
|
|
134
|
+
// Calculate how much memory data is left to read
|
|
135
|
+
size_t mem_data_left = zfm->used - mem_start;
|
|
136
|
+
|
|
137
|
+
// Calculate how much to read from memory in this call
|
|
138
|
+
size_t read_from_mem = (remaining_bytes < mem_data_left) ? remaining_bytes : mem_data_left;
|
|
139
|
+
|
|
140
|
+
if (read_from_mem > 0) {
|
|
141
|
+
memcpy(buffer_ptr, &zfm->buffer_start + mem_start, read_from_mem);
|
|
142
|
+
|
|
143
|
+
zfm->read_offset += read_from_mem;
|
|
144
|
+
buffer_ptr += read_from_mem;
|
|
145
|
+
remaining_bytes -= read_from_mem;
|
|
146
|
+
read_total += read_from_mem;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// 2. Read from Disk (if necessary)
|
|
151
|
+
if (remaining_bytes > 0 && zfm->tmp_f) {
|
|
152
|
+
// Note: The disk file pointer was already managed by zsv_memfile_rewind or is positioned correctly
|
|
153
|
+
// relative to the read_offset shift from step 1.
|
|
154
|
+
size_t read_from_disk = fread(buffer_ptr, 1, remaining_bytes, zfm->tmp_f);
|
|
155
|
+
|
|
156
|
+
zfm->read_offset += read_from_disk;
|
|
157
|
+
read_total += read_from_disk;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return read_total;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Equivalent to fclose: Cleans up resources.
|
|
165
|
+
*/
|
|
166
|
+
void zsv_memfile_close(zsv_memfile *zfm) {
|
|
167
|
+
if (!zfm)
|
|
168
|
+
return;
|
|
169
|
+
|
|
170
|
+
// Close and implicitly delete the temporary file if it was opened
|
|
171
|
+
if (zfm->tmp_f) {
|
|
172
|
+
fclose(zfm->tmp_f);
|
|
173
|
+
unlink(zfm->tmp_fn);
|
|
174
|
+
}
|
|
175
|
+
free(zfm->tmp_fn);
|
|
176
|
+
zfm->tmp_fn = NULL;
|
|
177
|
+
|
|
178
|
+
// Free the main structure
|
|
179
|
+
free(zfm);
|
|
180
|
+
}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
#include <string.h>
|
|
11
|
+
#include <stdlib.h>
|
|
12
|
+
#include <errno.h>
|
|
13
|
+
#include <unistd.h> // for close()
|
|
14
|
+
#include <fcntl.h> // open
|
|
15
|
+
|
|
16
|
+
#include <zsv/utils/dirs.h>
|
|
17
|
+
#include <zsv/utils/os.h>
|
|
18
|
+
#include <zsv/utils/file.h>
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Get a temp file name. The returned value, if any, will have been allocated
|
|
22
|
+
* on the heap, and the caller should `free()`
|
|
23
|
+
*
|
|
24
|
+
* @param prefix string with which the resulting file name will be prefixed
|
|
25
|
+
*/
|
|
26
|
+
#if defined(_WIN32) || defined(WIN32) || defined(WIN)
|
|
27
|
+
#include <windows.h>
|
|
28
|
+
|
|
29
|
+
char *zsv_get_temp_filename(const char *prefix) {
|
|
30
|
+
TCHAR lpTempPathBuffer[MAX_PATH];
|
|
31
|
+
DWORD dwRetVal = GetTempPath(MAX_PATH, // length of the buffer
|
|
32
|
+
lpTempPathBuffer); // buffer for path
|
|
33
|
+
if (!(dwRetVal > 0 && dwRetVal < MAX_PATH))
|
|
34
|
+
zsv_perror("GetTempPath");
|
|
35
|
+
else {
|
|
36
|
+
char szTempFileName[MAX_PATH];
|
|
37
|
+
UINT uRetVal = GetTempFileName(lpTempPathBuffer, // directory for tmp files
|
|
38
|
+
TEXT(prefix), // temp file name prefix
|
|
39
|
+
0, // create unique name
|
|
40
|
+
szTempFileName); // buffer for name
|
|
41
|
+
if (uRetVal > 0)
|
|
42
|
+
return strdup(szTempFileName);
|
|
43
|
+
zsv_perror(lpTempPathBuffer);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return NULL;
|
|
47
|
+
}
|
|
48
|
+
#else
|
|
49
|
+
|
|
50
|
+
char *zsv_get_temp_filename(const char *prefix) {
|
|
51
|
+
char *s = NULL;
|
|
52
|
+
char *tmpdir = getenv("TMPDIR");
|
|
53
|
+
if (!tmpdir)
|
|
54
|
+
tmpdir = ".";
|
|
55
|
+
asprintf(&s, "%s/%s_XXXXXXXX", tmpdir, prefix);
|
|
56
|
+
if (!s) {
|
|
57
|
+
const char *msg = strerror(errno);
|
|
58
|
+
fprintf(stderr, "%s%c%s: %s\n", tmpdir, FILESLASH, prefix, msg ? msg : "Unknown error");
|
|
59
|
+
} else {
|
|
60
|
+
int fd = mkstemp(s);
|
|
61
|
+
if (fd > 0) {
|
|
62
|
+
close(fd);
|
|
63
|
+
return s;
|
|
64
|
+
}
|
|
65
|
+
free(s);
|
|
66
|
+
}
|
|
67
|
+
return NULL;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
#endif
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Replacement for tmpfile().
|
|
74
|
+
* Returns filename; file must be manually removed after fclose
|
|
75
|
+
*
|
|
76
|
+
* @param mode optional mode passed to fopen(); if NULL, defaults to "wb"
|
|
77
|
+
*/
|
|
78
|
+
FILE *zsv_tmpfile(const char *prefix, char **filename, const char *mode) {
|
|
79
|
+
char *fn = zsv_get_temp_filename(prefix);
|
|
80
|
+
if (fn) {
|
|
81
|
+
FILE *f = fopen(fn, mode == NULL ? "wb" : mode);
|
|
82
|
+
if (f) {
|
|
83
|
+
*filename = fn;
|
|
84
|
+
return f;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
int e = errno;
|
|
88
|
+
free(fn);
|
|
89
|
+
errno = e;
|
|
90
|
+
return NULL;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Temporarily redirect a FILE * (e.g. stdout / stderr) to a temp file
|
|
95
|
+
* temp_filename and bak are set as return values
|
|
96
|
+
* caller must free temp_filename
|
|
97
|
+
*
|
|
98
|
+
* @param old_fd file descriptor of file to dupe e.g. fileno(stdout);
|
|
99
|
+
* @return fd needed to pass on to zsv_redirect_file_from_temp
|
|
100
|
+
*/
|
|
101
|
+
#if defined(_WIN32) || defined(__FreeBSD__)
|
|
102
|
+
#include <sys/stat.h> // S_IRUSR S_IWUSR
|
|
103
|
+
#endif
|
|
104
|
+
|
|
105
|
+
int zsv_redirect_file_to_temp(FILE *f, const char *tempfile_prefix, char **temp_filename) {
|
|
106
|
+
int new_fd;
|
|
107
|
+
int old_fd = fileno(f);
|
|
108
|
+
fflush(f);
|
|
109
|
+
int bak = dup(old_fd);
|
|
110
|
+
*temp_filename = zsv_get_temp_filename(tempfile_prefix);
|
|
111
|
+
|
|
112
|
+
new_fd = open(*temp_filename, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
|
|
113
|
+
|
|
114
|
+
dup2(new_fd, old_fd);
|
|
115
|
+
close(new_fd);
|
|
116
|
+
return bak;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Restore a FILE * that was redirected by zsv_redirect_file_to_temp()
|
|
121
|
+
*/
|
|
122
|
+
void zsv_redirect_file_from_temp(FILE *f, int bak, int old_fd) {
|
|
123
|
+
fflush(f);
|
|
124
|
+
dup2(bak, old_fd);
|
|
125
|
+
close(bak);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
#if defined(_WIN32) || defined(WIN32) || defined(WIN)
|
|
129
|
+
int zsv_file_exists(const char *filename) {
|
|
130
|
+
DWORD attributes = GetFileAttributes(filename);
|
|
131
|
+
return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
|
|
132
|
+
}
|
|
133
|
+
#else
|
|
134
|
+
#include <sys/stat.h> // S_IRUSR S_IWUSR
|
|
135
|
+
|
|
136
|
+
int zsv_file_exists(const char *filename) {
|
|
137
|
+
struct stat buffer;
|
|
138
|
+
if (stat(filename, &buffer) == 0) {
|
|
139
|
+
char is_dir = buffer.st_mode & S_IFDIR ? 1 : 0;
|
|
140
|
+
if (!is_dir)
|
|
141
|
+
return 1;
|
|
142
|
+
}
|
|
143
|
+
return 0;
|
|
144
|
+
}
|
|
145
|
+
#endif
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Copy a file, given source and destination paths
|
|
149
|
+
* On error, output error message and return non-zero
|
|
150
|
+
*/
|
|
151
|
+
int zsv_copy_file(const char *src, const char *dest) {
|
|
152
|
+
// create one or more directories if needed
|
|
153
|
+
if (zsv_mkdirs(dest, 1)) {
|
|
154
|
+
fprintf(stderr, "Unable to create directories needed for %s\n", dest);
|
|
155
|
+
return -1;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// copy the file
|
|
159
|
+
int err = 0;
|
|
160
|
+
FILE *fsrc = zsv_fopen(src, "rb");
|
|
161
|
+
if (!fsrc)
|
|
162
|
+
err = errno ? errno : -1, perror(src);
|
|
163
|
+
else {
|
|
164
|
+
FILE *fdest = zsv_fopen(dest, "wb");
|
|
165
|
+
if (!fdest)
|
|
166
|
+
err = errno ? errno : -1, perror(dest);
|
|
167
|
+
else {
|
|
168
|
+
err = zsv_copy_file_ptr(fsrc, fdest);
|
|
169
|
+
if (err)
|
|
170
|
+
perror(dest);
|
|
171
|
+
fclose(fdest);
|
|
172
|
+
}
|
|
173
|
+
fclose(fsrc);
|
|
174
|
+
}
|
|
175
|
+
return err;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Copy a file-like, given source and destination handles
|
|
180
|
+
* and read/write functions
|
|
181
|
+
* Return error number per errno.h
|
|
182
|
+
*/
|
|
183
|
+
int zsv_copy_filelike_ptr(
|
|
184
|
+
FILE *src, size_t (*freadx)(void *restrict ptr, size_t size, size_t nitems, void *restrict stream), FILE *dest,
|
|
185
|
+
size_t (*fwritex)(const void *restrict ptr, size_t size, size_t nitems, void *restrict stream)) {
|
|
186
|
+
int err = 0;
|
|
187
|
+
char buffer[4096 * 16];
|
|
188
|
+
size_t bytes_read;
|
|
189
|
+
while ((bytes_read = freadx(buffer, 1, sizeof(buffer), src)) > 0) {
|
|
190
|
+
if (fwritex(buffer, 1, bytes_read, dest) != bytes_read) {
|
|
191
|
+
err = errno ? errno : -1;
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
return err;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Copy a file, given source and destination FILE pointers
|
|
200
|
+
* Return error number per errno.h
|
|
201
|
+
*/
|
|
202
|
+
int zsv_copy_file_ptr(FILE *src, FILE *dest) {
|
|
203
|
+
return zsv_copy_filelike_ptr(
|
|
204
|
+
src, (size_t(*)(void *restrict ptr, size_t size, size_t nitems, void *restrict stream))fread, dest,
|
|
205
|
+
(size_t(*)(const void *restrict ptr, size_t size, size_t nitems, void *restrict stream))fwrite);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
size_t zsv_dir_len_basename(const char *filepath, const char **basename) {
|
|
209
|
+
for (size_t len = strlen(filepath); len; len--) {
|
|
210
|
+
if (filepath[len - 1] == '/' || filepath[len - 1] == '\\') {
|
|
211
|
+
*basename = filepath + len;
|
|
212
|
+
return len - 1;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
*basename = filepath;
|
|
217
|
+
return 0;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
int zsv_file_readable(const char *filename, int *err, FILE **f_out) {
|
|
221
|
+
FILE *f;
|
|
222
|
+
int rc;
|
|
223
|
+
if (err)
|
|
224
|
+
*err = 0;
|
|
225
|
+
// to do: use fstat()
|
|
226
|
+
if ((f = zsv_fopen(filename, "rb")) == NULL) {
|
|
227
|
+
rc = 0;
|
|
228
|
+
if (err)
|
|
229
|
+
*err = errno;
|
|
230
|
+
else
|
|
231
|
+
perror(filename);
|
|
232
|
+
} else {
|
|
233
|
+
rc = 1;
|
|
234
|
+
if (f_out)
|
|
235
|
+
*f_out = f;
|
|
236
|
+
else
|
|
237
|
+
fclose(f);
|
|
238
|
+
}
|
|
239
|
+
return rc;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Function that is the same as `fwrite()`, but can be used as a callback
|
|
244
|
+
* argument to `zsv_set_scan_filter()`
|
|
245
|
+
*/
|
|
246
|
+
size_t zsv_filter_write(void *FILEp, unsigned char *buff, size_t bytes_read) {
|
|
247
|
+
fwrite(buff, 1, bytes_read, (FILE *)FILEp);
|
|
248
|
+
return bytes_read;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
int zsv_no_printf(void *_ctx, const char *_format, ...) {
|
|
252
|
+
// do nothing!
|
|
253
|
+
return 0;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
#include "file-mem.c"
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
#include <assert.h>
|
|
2
|
+
#include <stdlib.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <unistd.h>
|
|
5
|
+
#include <zsv.h>
|
|
6
|
+
#include <zsv/utils/prop.h>
|
|
7
|
+
#include "index.h"
|
|
8
|
+
|
|
9
|
+
struct zsv_index *zsv_index_new(void) {
|
|
10
|
+
struct zsv_index *ix = calloc(1, sizeof(*ix));
|
|
11
|
+
|
|
12
|
+
if (!ix)
|
|
13
|
+
return ix;
|
|
14
|
+
|
|
15
|
+
const size_t init_cap = 512;
|
|
16
|
+
ix->first = calloc(1, sizeof(*ix->first) + init_cap * sizeof(ix->first->u64s[0]));
|
|
17
|
+
ix->first->capacity = init_cap;
|
|
18
|
+
|
|
19
|
+
return ix;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
void zsv_index_delete(struct zsv_index *ix) {
|
|
23
|
+
if (ix) {
|
|
24
|
+
struct zsv_index_array *arr = ix->first;
|
|
25
|
+
|
|
26
|
+
while (arr) {
|
|
27
|
+
struct zsv_index_array *a = arr;
|
|
28
|
+
arr = arr->next;
|
|
29
|
+
free(a);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
free(ix);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
enum zsv_index_status zsv_index_add_row(struct zsv_index *ix, uint64_t line_end) {
|
|
37
|
+
struct zsv_index_array *arr = ix->first;
|
|
38
|
+
size_t len = arr->len, cap = arr->capacity;
|
|
39
|
+
|
|
40
|
+
if (!ix->header_line_end) {
|
|
41
|
+
ix->header_line_end = line_end;
|
|
42
|
+
return zsv_index_status_ok;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
ix->row_count_local++;
|
|
46
|
+
|
|
47
|
+
if ((ix->row_count_local & (ZSV_INDEX_ROW_N - 1)) != 0)
|
|
48
|
+
return zsv_index_status_ok;
|
|
49
|
+
|
|
50
|
+
while (len >= cap) {
|
|
51
|
+
assert(len == cap);
|
|
52
|
+
|
|
53
|
+
if (!arr->next) {
|
|
54
|
+
len = 0;
|
|
55
|
+
cap *= 2;
|
|
56
|
+
arr->next = calloc(1, sizeof(*arr) + cap * sizeof(arr->u64s[0]));
|
|
57
|
+
arr = arr->next;
|
|
58
|
+
if (!arr)
|
|
59
|
+
return zsv_index_status_memory;
|
|
60
|
+
arr->capacity = cap;
|
|
61
|
+
} else {
|
|
62
|
+
arr = arr->next;
|
|
63
|
+
len = arr->len;
|
|
64
|
+
cap = arr->capacity;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
arr->u64s[len] = line_end;
|
|
69
|
+
arr->len++;
|
|
70
|
+
|
|
71
|
+
return zsv_index_status_ok;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
void zsv_index_commit_rows(struct zsv_index *ix) {
|
|
75
|
+
ix->row_count = ix->row_count_local;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
enum zsv_index_status zsv_index_row_end_offset(const struct zsv_index *ix, uint64_t row, uint64_t *offset_out,
|
|
79
|
+
uint64_t *remaining_rows_out) {
|
|
80
|
+
assert(ix->row_count <= ix->row_count_local);
|
|
81
|
+
|
|
82
|
+
if (row > ix->row_count)
|
|
83
|
+
return zsv_index_status_error;
|
|
84
|
+
|
|
85
|
+
if (row < ZSV_INDEX_ROW_N) {
|
|
86
|
+
*offset_out = ix->header_line_end;
|
|
87
|
+
*remaining_rows_out = row;
|
|
88
|
+
|
|
89
|
+
return zsv_index_status_ok;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const size_t i = (row >> ZSV_INDEX_ROW_SHIFT) - 1;
|
|
93
|
+
struct zsv_index_array *arr = ix->first;
|
|
94
|
+
size_t lens = 0;
|
|
95
|
+
|
|
96
|
+
while (i >= lens + arr->len) {
|
|
97
|
+
assert(arr->next);
|
|
98
|
+
|
|
99
|
+
lens += arr->len;
|
|
100
|
+
arr = arr->next;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
*offset_out = (long)arr->u64s[i - lens];
|
|
104
|
+
*remaining_rows_out = row & (ZSV_INDEX_ROW_N - 1);
|
|
105
|
+
|
|
106
|
+
return zsv_index_status_ok;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
struct seek_row_ctx {
|
|
110
|
+
uint64_t remaining_rows;
|
|
111
|
+
zsv_parser parser;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
static void seek_row_handler(void *ctx) {
|
|
115
|
+
struct seek_row_ctx *c = ctx;
|
|
116
|
+
|
|
117
|
+
c->remaining_rows--;
|
|
118
|
+
if (c->remaining_rows > 0)
|
|
119
|
+
return;
|
|
120
|
+
|
|
121
|
+
zsv_abort(c->parser);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
static enum zsv_index_status seek_and_check_newline(long *offset, struct zsv_opts *opts) {
|
|
125
|
+
char new_line[2];
|
|
126
|
+
zsv_generic_read read = (zsv_generic_read)fread;
|
|
127
|
+
zsv_generic_seek seek = (zsv_generic_seek)fseek;
|
|
128
|
+
FILE *stream = opts->stream;
|
|
129
|
+
|
|
130
|
+
if (opts->seek)
|
|
131
|
+
seek = opts->seek;
|
|
132
|
+
|
|
133
|
+
if (opts->read)
|
|
134
|
+
read = opts->read;
|
|
135
|
+
|
|
136
|
+
if (seek(stream, *offset, SEEK_SET))
|
|
137
|
+
return zsv_index_status_error;
|
|
138
|
+
|
|
139
|
+
size_t nmemb = read(new_line, 1, 2, stream);
|
|
140
|
+
|
|
141
|
+
if (nmemb < 1)
|
|
142
|
+
return zsv_index_status_error;
|
|
143
|
+
|
|
144
|
+
if (new_line[0] == '\n') {
|
|
145
|
+
*offset += 1;
|
|
146
|
+
} else if (new_line[0] == '\r') {
|
|
147
|
+
*offset += 1;
|
|
148
|
+
|
|
149
|
+
if (new_line[1] == '\n')
|
|
150
|
+
*offset += 1;
|
|
151
|
+
} else {
|
|
152
|
+
return zsv_index_status_error;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (seek(stream, *offset, SEEK_SET))
|
|
156
|
+
return zsv_index_status_error;
|
|
157
|
+
|
|
158
|
+
return zsv_index_status_ok;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
enum zsv_index_status zsv_index_seek_row(const struct zsv_index *ix, struct zsv_opts *opts, uint64_t row) {
|
|
162
|
+
uint64_t offset;
|
|
163
|
+
uint64_t remaining_rows;
|
|
164
|
+
enum zsv_index_status zist = zsv_index_row_end_offset(ix, row, &offset, &remaining_rows);
|
|
165
|
+
|
|
166
|
+
if (zist != zsv_index_status_ok)
|
|
167
|
+
return zist;
|
|
168
|
+
|
|
169
|
+
if ((zist = seek_and_check_newline((long *)&offset, opts)) != zsv_index_status_ok)
|
|
170
|
+
return zist;
|
|
171
|
+
|
|
172
|
+
if (!remaining_rows)
|
|
173
|
+
return zsv_index_status_ok;
|
|
174
|
+
|
|
175
|
+
struct seek_row_ctx ctx = {
|
|
176
|
+
.remaining_rows = remaining_rows,
|
|
177
|
+
};
|
|
178
|
+
struct zsv_opts o;
|
|
179
|
+
memcpy(&o, opts, sizeof(o));
|
|
180
|
+
o.ctx = &ctx;
|
|
181
|
+
o.row_handler = seek_row_handler;
|
|
182
|
+
zsv_parser parser = zsv_new(&o);
|
|
183
|
+
ctx.parser = parser;
|
|
184
|
+
|
|
185
|
+
enum zsv_status zst;
|
|
186
|
+
while ((zst = zsv_parse_more(parser)) == zsv_status_ok)
|
|
187
|
+
;
|
|
188
|
+
|
|
189
|
+
if (zst != zsv_status_cancelled)
|
|
190
|
+
return zsv_index_status_error;
|
|
191
|
+
|
|
192
|
+
offset += zsv_cum_scanned_length(parser);
|
|
193
|
+
|
|
194
|
+
zsv_delete(parser);
|
|
195
|
+
|
|
196
|
+
return seek_and_check_newline((long *)&offset, opts);
|
|
197
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#ifndef ZSV_UTILS_INDEX_H
|
|
2
|
+
#define ZSV_UTILS_INDEX_H
|
|
3
|
+
|
|
4
|
+
#include <stdint.h>
|
|
5
|
+
#include <stddef.h>
|
|
6
|
+
|
|
7
|
+
#include "zsv/common.h"
|
|
8
|
+
|
|
9
|
+
// Decides the number of rows we skip when storing the line end
|
|
10
|
+
// 1 << 10 = 1024 means that we store every 1024th line end
|
|
11
|
+
#define ZSV_INDEX_ROW_SHIFT 10
|
|
12
|
+
#define ZSV_INDEX_ROW_N (1 << ZSV_INDEX_ROW_SHIFT)
|
|
13
|
+
|
|
14
|
+
enum zsv_index_status {
|
|
15
|
+
zsv_index_status_ok = 0,
|
|
16
|
+
zsv_index_status_memory,
|
|
17
|
+
zsv_index_status_error,
|
|
18
|
+
zsv_index_status_utf8,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
// An array of uint64_t. Needs to be reallocated to extend the capacity.
|
|
22
|
+
// Reallocation can be avoided by adding new arrays instead.
|
|
23
|
+
struct zsv_index_array {
|
|
24
|
+
size_t capacity;
|
|
25
|
+
size_t len;
|
|
26
|
+
struct zsv_index_array *next;
|
|
27
|
+
uint64_t u64s[];
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
struct zsv_index {
|
|
31
|
+
uint64_t header_line_end;
|
|
32
|
+
// Reading and writing should be protected with a lock
|
|
33
|
+
uint64_t row_count;
|
|
34
|
+
// Should only be updated by the thread building the index
|
|
35
|
+
uint64_t row_count_local;
|
|
36
|
+
|
|
37
|
+
// array containing the offsets of every ZSV_INDEX_ROW_N line end
|
|
38
|
+
struct zsv_index_array *first;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
struct zsv_index *zsv_index_new(void);
|
|
42
|
+
void zsv_index_delete(struct zsv_index *ix);
|
|
43
|
+
enum zsv_index_status zsv_index_add_row(struct zsv_index *ix, uint64_t line_end);
|
|
44
|
+
void zsv_index_commit_rows(struct zsv_index *ix);
|
|
45
|
+
enum zsv_index_status zsv_index_row_end_offset(const struct zsv_index *ix, uint64_t row, uint64_t *offset_out,
|
|
46
|
+
uint64_t *remaining_rows_out);
|
|
47
|
+
enum zsv_index_status zsv_index_seek_row(const struct zsv_index *ix, struct zsv_opts *opts, uint64_t row);
|
|
48
|
+
|
|
49
|
+
#endif
|