zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,913 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2023 Liquidaty and the zsv/lib contributors
|
|
3
|
+
* All rights reserved
|
|
4
|
+
*
|
|
5
|
+
* This file is part of zsv/lib, distributed under the license defined at
|
|
6
|
+
* https://opensource.org/licenses/MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
#include <string.h>
|
|
11
|
+
#include <stdlib.h>
|
|
12
|
+
#include <math.h>
|
|
13
|
+
#include <limits.h>
|
|
14
|
+
|
|
15
|
+
#include <jsonwriter.h>
|
|
16
|
+
|
|
17
|
+
#include <sqlite3.h>
|
|
18
|
+
extern sqlite3_module CsvModule;
|
|
19
|
+
|
|
20
|
+
#include <zsv/utils/string.h>
|
|
21
|
+
#include <zsv/utils/writer.h>
|
|
22
|
+
|
|
23
|
+
#define ZSV_COMMAND compare
|
|
24
|
+
#include "zsv_command.h"
|
|
25
|
+
|
|
26
|
+
#include "compare.h"
|
|
27
|
+
#include "compare_internal.h"
|
|
28
|
+
|
|
29
|
+
#include "compare_unique_colname.c"
|
|
30
|
+
#include "compare_added_column.c"
|
|
31
|
+
#include "compare_sort.c"
|
|
32
|
+
|
|
33
|
+
#define ZSV_COMPARE_OUTPUT_TYPE_JSON 'j'
|
|
34
|
+
|
|
35
|
+
static struct zsv_compare_key **zsv_compare_key_add(struct zsv_compare_key **next, const char *s, int *err) {
|
|
36
|
+
struct zsv_compare_key *k = calloc(1, sizeof(*k));
|
|
37
|
+
if (!k)
|
|
38
|
+
*err = 1;
|
|
39
|
+
else {
|
|
40
|
+
k->name = s;
|
|
41
|
+
*next = k;
|
|
42
|
+
next = &k->next;
|
|
43
|
+
}
|
|
44
|
+
return next;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
static void zsv_compare_output_property_name(struct zsv_compare_data *data, int new_row, char skip) {
|
|
48
|
+
if (new_row)
|
|
49
|
+
data->writer.cell_ix = 0;
|
|
50
|
+
else
|
|
51
|
+
data->writer.cell_ix++;
|
|
52
|
+
if (!skip) {
|
|
53
|
+
if (data->writer.cell_ix < data->writer.properties.used)
|
|
54
|
+
jsonwriter_object_key(data->writer.handle.jsw, data->writer.properties.names[data->writer.cell_ix]);
|
|
55
|
+
else
|
|
56
|
+
jsonwriter_object_key(data->writer.handle.jsw, "Error missing key!");
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static void zsv_compare_output_strn(struct zsv_compare_data *data, const unsigned char *s, size_t len, int new_row,
|
|
61
|
+
int quoted) {
|
|
62
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
|
|
63
|
+
if (data->writer.object && s == NULL) {
|
|
64
|
+
zsv_compare_output_property_name(data, new_row, 1);
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
if (data->writer.object)
|
|
68
|
+
zsv_compare_output_property_name(data, new_row, 0);
|
|
69
|
+
if (s == NULL)
|
|
70
|
+
jsonwriter_null(data->writer.handle.jsw);
|
|
71
|
+
else
|
|
72
|
+
jsonwriter_strn(data->writer.handle.jsw, s, len);
|
|
73
|
+
} else {
|
|
74
|
+
if (s == NULL)
|
|
75
|
+
zsv_writer_cell_blank(data->writer.handle.csv, ZSV_WRITER_SAME_ROW);
|
|
76
|
+
else
|
|
77
|
+
zsv_writer_cell(data->writer.handle.csv, new_row, s, len, quoted);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
static void zsv_compare_output_str(struct zsv_compare_data *data, const unsigned char *s, int new_row, int quoted) {
|
|
82
|
+
zsv_compare_output_strn(data, s, s ? strlen((const char *)s) : 0, new_row, quoted);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static void zsv_compare_output_zu(struct zsv_compare_data *data, size_t n, int new_row) {
|
|
86
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
|
|
87
|
+
if (data->writer.object)
|
|
88
|
+
zsv_compare_output_property_name(data, new_row, 0);
|
|
89
|
+
jsonwriter_int(data->writer.handle.jsw, n);
|
|
90
|
+
} else
|
|
91
|
+
zsv_writer_cell_zu(data->writer.handle.csv, ZSV_WRITER_NEW_ROW, data->row_count);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
static void zsv_compare_header_str(struct zsv_compare_data *data, const unsigned char *s, int new_row, int quoted) {
|
|
95
|
+
if (!(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && data->writer.object))
|
|
96
|
+
zsv_compare_output_str(data, s, new_row, quoted);
|
|
97
|
+
else {
|
|
98
|
+
// we will output as JSON objects, so save the property names for later use
|
|
99
|
+
if (data->writer.properties.used + 1 < data->writer.properties.allocated)
|
|
100
|
+
data->writer.properties.names[data->writer.properties.used++] = strdup(s ? (const char *)s : "");
|
|
101
|
+
else
|
|
102
|
+
fprintf(stderr, "zsv_compare_header_str: insufficient header names allocation adding %s!\n", s);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
static void zsv_compare_allocate_properties(struct zsv_compare_data *data, unsigned count) {
|
|
107
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && data->writer.object && count > 0) {
|
|
108
|
+
if ((data->writer.properties.names = malloc(count * sizeof(*data->writer.properties.names))))
|
|
109
|
+
data->writer.properties.allocated = count;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
static void zsv_compare_json_row_start(struct zsv_compare_data *data) {
|
|
114
|
+
if (data->writer.object)
|
|
115
|
+
jsonwriter_start_object(data->writer.handle.jsw);
|
|
116
|
+
else
|
|
117
|
+
jsonwriter_start_array(data->writer.handle.jsw);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
static void zsv_compare_json_row_end(struct zsv_compare_data *data) {
|
|
121
|
+
if (data->writer.object)
|
|
122
|
+
jsonwriter_end_object(data->writer.handle.jsw);
|
|
123
|
+
else
|
|
124
|
+
jsonwriter_end_array(data->writer.handle.jsw);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
static void zsv_compare_output_tuple(struct zsv_compare_data *data, struct zsv_compare_input *key_input,
|
|
128
|
+
const unsigned char *colname,
|
|
129
|
+
struct zsv_cell *values, // in original input order
|
|
130
|
+
char is_key) {
|
|
131
|
+
// print ID | Column | Value 1 | ... | Value N
|
|
132
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON)
|
|
133
|
+
zsv_compare_json_row_start(data);
|
|
134
|
+
|
|
135
|
+
// TO DO: output ID values
|
|
136
|
+
if (!data->keys) // id is effectively just row number
|
|
137
|
+
zsv_compare_output_zu(data, data->row_count, ZSV_WRITER_NEW_ROW);
|
|
138
|
+
else {
|
|
139
|
+
for (unsigned idx = 0; idx < key_input->key_count; idx++) {
|
|
140
|
+
struct zsv_cell *c = &key_input->keys[idx].value;
|
|
141
|
+
zsv_compare_output_strn(data, c->str, c->len, idx == 0 ? ZSV_WRITER_NEW_ROW : ZSV_WRITER_SAME_ROW, c->quoted);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// output additional columns
|
|
146
|
+
for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next) {
|
|
147
|
+
if (!ac->input) {
|
|
148
|
+
if (data->writer.type != ZSV_COMPARE_OUTPUT_TYPE_JSON)
|
|
149
|
+
zsv_compare_output_str(data, NULL, ZSV_WRITER_SAME_ROW, 0);
|
|
150
|
+
} else {
|
|
151
|
+
struct zsv_cell c = data->get_cell(ac->input, ac->col_ix);
|
|
152
|
+
zsv_compare_output_strn(data, c.str, c.len, ZSV_WRITER_SAME_ROW, c.quoted);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// output column name of this cell
|
|
157
|
+
zsv_compare_output_str(data, colname, ZSV_WRITER_SAME_ROW, 1);
|
|
158
|
+
|
|
159
|
+
for (unsigned i = 0; i < data->input_count; i++) {
|
|
160
|
+
struct zsv_compare_input *input = &data->inputs[i];
|
|
161
|
+
if ((input->done || !input->row_loaded) && !is_key) { // no data for this input
|
|
162
|
+
zsv_compare_output_str(data, NULL, ZSV_WRITER_SAME_ROW, 0);
|
|
163
|
+
} else {
|
|
164
|
+
struct zsv_cell *value = &values[i];
|
|
165
|
+
zsv_compare_output_strn(data, value->str, value->len, ZSV_WRITER_SAME_ROW, value->quoted);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON)
|
|
170
|
+
zsv_compare_json_row_end(data);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
static const unsigned char *zsv_compare_combined_key_names(struct zsv_compare_data *data) {
|
|
174
|
+
if (!data->combined_key_names) {
|
|
175
|
+
size_t len = 2;
|
|
176
|
+
|
|
177
|
+
for (unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
|
|
178
|
+
struct zsv_compare_key *key = &data->keys[key_ix];
|
|
179
|
+
if (key && key->name)
|
|
180
|
+
len += strlen(key->name) + 1;
|
|
181
|
+
}
|
|
182
|
+
if ((data->combined_key_names = calloc(1, len))) {
|
|
183
|
+
unsigned char *start = NULL;
|
|
184
|
+
for (unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
|
|
185
|
+
struct zsv_compare_key *key = &data->keys[key_ix];
|
|
186
|
+
if (key && key->name) {
|
|
187
|
+
if (start) {
|
|
188
|
+
*start = (unsigned char)'|';
|
|
189
|
+
start++;
|
|
190
|
+
} else
|
|
191
|
+
start = data->combined_key_names;
|
|
192
|
+
strcpy((char *)start, key->name);
|
|
193
|
+
start += strlen((char *)start);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
return data->combined_key_names;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
static void zsv_compare_print_row(struct zsv_compare_data *data,
|
|
202
|
+
const unsigned last_ix // last input ix in inputs_to_sort
|
|
203
|
+
) {
|
|
204
|
+
struct zsv_compare_input *key_input = data->inputs_to_sort[0];
|
|
205
|
+
|
|
206
|
+
// for now, output format is simple: for each value,
|
|
207
|
+
// output a single scalar if the values are the same,
|
|
208
|
+
// and a tuple if they differ
|
|
209
|
+
struct zsv_cell *values = calloc(data->input_count, sizeof(*values));
|
|
210
|
+
if (!values) {
|
|
211
|
+
data->status = zsv_compare_status_memory;
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
#define ZSV_COMPARE_MISSING "Missing"
|
|
216
|
+
|
|
217
|
+
// if we don't have data from every input, then output "Missing" for missing inputs
|
|
218
|
+
char got_missing = 0;
|
|
219
|
+
for (unsigned i = 0; i < data->input_count; i++) {
|
|
220
|
+
struct zsv_compare_input *input = data->inputs_to_sort[i];
|
|
221
|
+
if (i > last_ix) {
|
|
222
|
+
got_missing = 1;
|
|
223
|
+
unsigned input_ix = input->index;
|
|
224
|
+
values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
|
|
225
|
+
values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
if (got_missing) {
|
|
229
|
+
const unsigned char *key_names =
|
|
230
|
+
data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)"<key>";
|
|
231
|
+
zsv_compare_output_tuple(data, key_input, key_names, values, 1);
|
|
232
|
+
// reset values
|
|
233
|
+
memset(values, 0, data->input_count * sizeof(*values));
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// for each output column
|
|
237
|
+
zsv_compare_unique_colname *output_col = data->output_colnames_first;
|
|
238
|
+
for (unsigned output_ix = 0; output_ix < data->output_colcount && output_col != NULL;
|
|
239
|
+
output_ix++, output_col = output_col->next) {
|
|
240
|
+
if (output_col->is_key)
|
|
241
|
+
continue;
|
|
242
|
+
|
|
243
|
+
char different = 0;
|
|
244
|
+
unsigned first_input_ix = 0;
|
|
245
|
+
for (unsigned i = 0; i <= last_ix; i++) {
|
|
246
|
+
struct zsv_compare_input *input = data->inputs_to_sort[i];
|
|
247
|
+
if (input->done || !input->row_loaded)
|
|
248
|
+
continue;
|
|
249
|
+
|
|
250
|
+
unsigned input_ix = input->index;
|
|
251
|
+
if (i == 0)
|
|
252
|
+
first_input_ix = input_ix;
|
|
253
|
+
|
|
254
|
+
unsigned col_ix_plus_1 = input->out2in[output_ix];
|
|
255
|
+
if (col_ix_plus_1 == 0)
|
|
256
|
+
values[input_ix].len = 0;
|
|
257
|
+
else {
|
|
258
|
+
unsigned input_col_ix = col_ix_plus_1 - 1;
|
|
259
|
+
if (!output_col)
|
|
260
|
+
output_col = input->output_colnames[input_col_ix];
|
|
261
|
+
values[input_ix] = data->get_cell(input, input_col_ix);
|
|
262
|
+
if (i > 0 && !different &&
|
|
263
|
+
data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix)) {
|
|
264
|
+
different = 1;
|
|
265
|
+
if (data->tolerance.value && values[first_input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN &&
|
|
266
|
+
values[input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN) {
|
|
267
|
+
// check if both are numbers with a difference less than the given tolerance
|
|
268
|
+
double d1, d2;
|
|
269
|
+
memcpy(data->tolerance.str1, values[first_input_ix].str, values[first_input_ix].len);
|
|
270
|
+
data->tolerance.str1[values[first_input_ix].len] = '\0';
|
|
271
|
+
memcpy(data->tolerance.str2, values[input_ix].str, values[input_ix].len);
|
|
272
|
+
data->tolerance.str2[values[input_ix].len] = '\0';
|
|
273
|
+
if (!zsv_strtod_exact(data->tolerance.str1, &d1) && !zsv_strtod_exact(data->tolerance.str2, &d2) &&
|
|
274
|
+
fabs(d1 - d2) < data->tolerance.value)
|
|
275
|
+
different = 0;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if (different) {
|
|
282
|
+
zsv_compare_output_tuple(data, key_input, output_col->name, values, 0);
|
|
283
|
+
if (data->diff_count < INT_MAX)
|
|
284
|
+
data->diff_count++;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
free(values);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
static void zsv_compare_input_free(struct zsv_compare_input *input) {
|
|
291
|
+
zsv_delete(input->parser);
|
|
292
|
+
zsv_compare_unique_colnames_delete(&input->colnames);
|
|
293
|
+
if (input->added)
|
|
294
|
+
sqlite3_zsv_list_remove(input->path);
|
|
295
|
+
free(input->out2in);
|
|
296
|
+
if (input->stream)
|
|
297
|
+
fclose(input->stream);
|
|
298
|
+
free(input->output_colnames);
|
|
299
|
+
free(input->keys);
|
|
300
|
+
if (input->sort_stmt) {
|
|
301
|
+
sqlite3_finalize(input->sort_stmt);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
static enum zsv_compare_status zsv_compare_set_inputs(struct zsv_compare_data *data, unsigned input_count) {
|
|
306
|
+
if (!input_count || !(data->inputs = calloc(input_count, sizeof(*data->inputs))) ||
|
|
307
|
+
!(data->inputs_to_sort = calloc(input_count, sizeof(*data->inputs_to_sort))))
|
|
308
|
+
return zsv_compare_status_memory;
|
|
309
|
+
data->input_count = input_count;
|
|
310
|
+
for (unsigned i = 0; i < input_count; i++) {
|
|
311
|
+
struct zsv_compare_input *input = &data->inputs[i];
|
|
312
|
+
input->index = i;
|
|
313
|
+
data->inputs_to_sort[i] = input;
|
|
314
|
+
if (data->key_count) {
|
|
315
|
+
if (!(input->keys = calloc(data->key_count, sizeof(*input->keys))))
|
|
316
|
+
return zsv_compare_status_memory;
|
|
317
|
+
|
|
318
|
+
input->key_count = data->key_count;
|
|
319
|
+
unsigned j = 0;
|
|
320
|
+
for (struct zsv_compare_key *key = data->keys; key; key = key->next)
|
|
321
|
+
input->keys[j++].key = key;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return zsv_compare_status_ok;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2, void *data, unsigned col_ix);
|
|
328
|
+
|
|
329
|
+
static void zsv_compare_output_begin(struct zsv_compare_data *data) {
|
|
330
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
|
|
331
|
+
if (!(data->writer.handle.jsw = jsonwriter_new(stdout))) // to do: data->out
|
|
332
|
+
data->status = zsv_compare_status_memory;
|
|
333
|
+
else {
|
|
334
|
+
if (data->writer.compact)
|
|
335
|
+
jsonwriter_set_option(data->writer.handle.jsw, jsonwriter_option_compact);
|
|
336
|
+
jsonwriter_start_array(data->writer.handle.jsw);
|
|
337
|
+
}
|
|
338
|
+
} else {
|
|
339
|
+
if (!(data->writer.handle.csv = zsv_writer_new(NULL)))
|
|
340
|
+
data->status = zsv_compare_status_memory;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (data->status == zsv_compare_status_ok) {
|
|
344
|
+
unsigned header_col_count = (data->key_count ? data->key_count : 1) + // match keys
|
|
345
|
+
2 + // column name and column value
|
|
346
|
+
data->input_count + // input names
|
|
347
|
+
data->added_colcount; // added columns
|
|
348
|
+
|
|
349
|
+
zsv_compare_allocate_properties(data, header_col_count);
|
|
350
|
+
|
|
351
|
+
// write header row
|
|
352
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && !data->writer.object)
|
|
353
|
+
jsonwriter_start_array(data->writer.handle.jsw);
|
|
354
|
+
|
|
355
|
+
// write keys
|
|
356
|
+
if (!data->keys) // id is effectively just row number
|
|
357
|
+
zsv_compare_header_str(data, (const unsigned char *)"Row #", ZSV_WRITER_NEW_ROW, 0);
|
|
358
|
+
else {
|
|
359
|
+
for (struct zsv_compare_key *key_name = data->keys; key_name; key_name = key_name->next)
|
|
360
|
+
zsv_compare_header_str(data, (const unsigned char *)key_name->name,
|
|
361
|
+
key_name == data->keys ? ZSV_WRITER_NEW_ROW : ZSV_WRITER_SAME_ROW, 1);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// write additional column names
|
|
365
|
+
for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next)
|
|
366
|
+
zsv_compare_header_str(data, ac->colname->name, ZSV_WRITER_SAME_ROW, 1);
|
|
367
|
+
|
|
368
|
+
// write "Column"
|
|
369
|
+
zsv_compare_header_str(data, (const unsigned char *)"Column", ZSV_WRITER_SAME_ROW, 0);
|
|
370
|
+
|
|
371
|
+
// write input name(s)
|
|
372
|
+
for (unsigned i = 0; i < data->input_count; i++) {
|
|
373
|
+
struct zsv_compare_input *input = &data->inputs[i];
|
|
374
|
+
zsv_compare_header_str(data, (const unsigned char *)input->path, ZSV_WRITER_SAME_ROW, 1);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && !data->writer.object)
|
|
378
|
+
jsonwriter_end_array(data->writer.handle.jsw);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
static void zsv_compare_output_end(struct zsv_compare_data *data) {
|
|
383
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
|
|
384
|
+
if (data->writer.handle.jsw)
|
|
385
|
+
jsonwriter_end(data->writer.handle.jsw);
|
|
386
|
+
} else {
|
|
387
|
+
zsv_writer_flush(data->writer.handle.csv);
|
|
388
|
+
}
|
|
389
|
+
if (data->status == zsv_compare_status_no_more_input)
|
|
390
|
+
data->status = zsv_compare_status_ok;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
static enum zsv_status zsv_compare_next_unsorted_row(struct zsv_compare_input *input) {
|
|
394
|
+
return zsv_next_row(input->parser);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
static struct zsv_cell zsv_compare_get_unsorted_cell(struct zsv_compare_input *input, unsigned ix) {
|
|
398
|
+
return zsv_get_cell_trimmed(input->parser, ix);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
static unsigned zsv_compare_get_unsorted_colcount(struct zsv_compare_input *input) {
|
|
402
|
+
return zsv_cell_count(input->parser);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
static enum zsv_compare_status input_init_unsorted(struct zsv_compare_data *data, struct zsv_compare_input *input,
|
|
406
|
+
struct zsv_opts *opts,
|
|
407
|
+
struct zsv_prop_handler *custom_prop_handler) {
|
|
408
|
+
if (!(input->stream = fopen(input->path, "rb"))) {
|
|
409
|
+
perror(input->path);
|
|
410
|
+
return zsv_compare_status_error;
|
|
411
|
+
}
|
|
412
|
+
struct zsv_opts these_opts = *opts;
|
|
413
|
+
these_opts.stream = input->stream;
|
|
414
|
+
enum zsv_status stat = zsv_new_with_properties(&these_opts, custom_prop_handler, input->path, &input->parser);
|
|
415
|
+
if (stat != zsv_status_ok)
|
|
416
|
+
return zsv_compare_status_error;
|
|
417
|
+
|
|
418
|
+
if (data->next_row(input) != zsv_status_row)
|
|
419
|
+
return zsv_compare_status_error;
|
|
420
|
+
|
|
421
|
+
return zsv_compare_status_ok;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
zsv_compare_handle zsv_compare_new(void) {
|
|
425
|
+
zsv_compare_handle z = calloc(1, sizeof(*z));
|
|
426
|
+
#if defined(ZSV_COMPARE_CMP_FUNC) && defined(ZSV_COMPARE_CMP_CTX)
|
|
427
|
+
zsv_compare_set_comparison(z, ZSV_COMPARE_CMP_FUNC, ZSV_COMPARE_CMP_CTX);
|
|
428
|
+
#else
|
|
429
|
+
zsv_compare_set_comparison(z, zsv_compare_cell, NULL);
|
|
430
|
+
#endif
|
|
431
|
+
z->output_colnames_next = &z->output_colnames;
|
|
432
|
+
|
|
433
|
+
z->next_row = zsv_compare_next_unsorted_row;
|
|
434
|
+
z->get_cell = zsv_compare_get_unsorted_cell;
|
|
435
|
+
z->get_column_name = zsv_compare_get_unsorted_cell;
|
|
436
|
+
z->get_column_count = zsv_compare_get_unsorted_colcount;
|
|
437
|
+
z->input_init = input_init_unsorted;
|
|
438
|
+
return z;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
static void zsv_compare_set_sorted_callbacks(struct zsv_compare_data *data) {
|
|
442
|
+
data->next_row = zsv_compare_next_sorted_row;
|
|
443
|
+
data->get_cell = zsv_compare_get_sorted_cell;
|
|
444
|
+
data->get_column_name = zsv_compare_get_sorted_colname;
|
|
445
|
+
data->get_column_count = zsv_compare_get_sorted_colcount;
|
|
446
|
+
data->input_init = input_init_sorted;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
static enum zsv_compare_status zsv_compare_init_sorted(struct zsv_compare_data *data) {
|
|
450
|
+
int rc;
|
|
451
|
+
// to do: use sql_internal.h interface
|
|
452
|
+
const char *db_url = data->sort_in_memory ? "file::memory:" : "";
|
|
453
|
+
if ((rc = sqlite3_open_v2(db_url, &data->sort_db, SQLITE_OPEN_URI | SQLITE_OPEN_READWRITE, NULL)) == SQLITE_OK &&
|
|
454
|
+
data->sort_db && (rc = sqlite3_create_module(data->sort_db, "csv", &CsvModule, 0) == SQLITE_OK)) {
|
|
455
|
+
zsv_compare_set_sorted_callbacks(data);
|
|
456
|
+
return zsv_compare_status_ok;
|
|
457
|
+
}
|
|
458
|
+
return zsv_compare_status_error;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
static void zsv_compare_data_free(struct zsv_compare_data *data) {
|
|
462
|
+
if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
|
|
463
|
+
if (data->writer.handle.jsw)
|
|
464
|
+
jsonwriter_delete(data->writer.handle.jsw);
|
|
465
|
+
} else
|
|
466
|
+
zsv_writer_delete(data->writer.handle.csv);
|
|
467
|
+
|
|
468
|
+
for (unsigned i = 0; i < data->input_count; i++)
|
|
469
|
+
zsv_compare_input_free(&data->inputs[i]);
|
|
470
|
+
free(data->inputs);
|
|
471
|
+
free(data->combined_key_names);
|
|
472
|
+
free(data->inputs_to_sort);
|
|
473
|
+
for (unsigned i = 0; i < data->writer.properties.used; i++)
|
|
474
|
+
free(data->writer.properties.names[i]);
|
|
475
|
+
free(data->writer.properties.names);
|
|
476
|
+
|
|
477
|
+
if (data->sort) {
|
|
478
|
+
if (data->sort_db)
|
|
479
|
+
sqlite3_close(data->sort_db);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
zsv_compare_added_column_delete(data->added_columns);
|
|
483
|
+
|
|
484
|
+
zsv_compare_unique_colnames_delete(&data->output_colnames);
|
|
485
|
+
zsv_compare_unique_colnames_delete(&data->added_colnames);
|
|
486
|
+
|
|
487
|
+
for (struct zsv_compare_key *next, *key = data->keys; key; key = next) {
|
|
488
|
+
next = key->next;
|
|
489
|
+
free(key);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
void zsv_compare_delete(zsv_compare_handle z) {
|
|
494
|
+
if (z) {
|
|
495
|
+
zsv_compare_data_free(z);
|
|
496
|
+
free(z);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
void zsv_compare_set_comparison(struct zsv_compare_data *data, zsv_compare_cell_func cmp, void *cmp_ctx) {
|
|
501
|
+
data->cmp = cmp;
|
|
502
|
+
data->cmp_ctx = cmp_ctx;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2, void *data, unsigned col_ix) {
|
|
506
|
+
(void)(ctx);
|
|
507
|
+
(void)(data);
|
|
508
|
+
(void)(col_ix);
|
|
509
|
+
return zsv_strincmp(c1.str, c1.len, c2.str, c2.len);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
static enum zsv_compare_status zsv_compare_advance(struct zsv_compare_data *data) {
|
|
513
|
+
// advance each input (if not row_loaded) to their next row
|
|
514
|
+
char got = 0;
|
|
515
|
+
for (unsigned i = 0; i < data->input_count; i++) {
|
|
516
|
+
struct zsv_compare_input *input = &data->inputs[i];
|
|
517
|
+
if (input->done)
|
|
518
|
+
continue;
|
|
519
|
+
|
|
520
|
+
if (input->row_loaded) {
|
|
521
|
+
got = 1;
|
|
522
|
+
continue;
|
|
523
|
+
}
|
|
524
|
+
if (data->next_row(input) != zsv_status_row)
|
|
525
|
+
input->done = 1;
|
|
526
|
+
else {
|
|
527
|
+
for (unsigned idx = 0; idx < input->key_count; idx++)
|
|
528
|
+
input->keys[idx].value = data->get_cell(input, input->keys[idx].col_ix);
|
|
529
|
+
input->row_loaded = 1;
|
|
530
|
+
got = 1;
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
return got ? zsv_compare_status_ok : zsv_compare_status_no_more_input;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
static int zsv_compare_inputp_cmp(const void *inputpx, const void *inputpy) {
|
|
537
|
+
struct zsv_compare_input *const *xp = inputpx;
|
|
538
|
+
struct zsv_compare_input *const *yp = inputpy;
|
|
539
|
+
const struct zsv_compare_input *x = *xp;
|
|
540
|
+
const struct zsv_compare_input *y = *yp;
|
|
541
|
+
|
|
542
|
+
if (!x->row_loaded && !y->row_loaded)
|
|
543
|
+
return 0;
|
|
544
|
+
if (!x->row_loaded)
|
|
545
|
+
return 1;
|
|
546
|
+
if (!y->row_loaded)
|
|
547
|
+
return -1;
|
|
548
|
+
|
|
549
|
+
int cmp = 0;
|
|
550
|
+
for (unsigned i = 0; !cmp && i < x->key_count && i < y->key_count; i++)
|
|
551
|
+
// for multibyte input, the input must be also sorted lexicographically
|
|
552
|
+
// to avoid potential mismatches
|
|
553
|
+
// see e.g. https://stackoverflow.com/questions/4611302/sorting-utf-8-strings
|
|
554
|
+
cmp = zsv_strincmp(x->keys[i].value.str, x->keys[i].value.len, y->keys[i].value.str, y->keys[i].value.len);
|
|
555
|
+
return cmp;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
static enum zsv_compare_status zsv_compare_next(struct zsv_compare_data *data) {
|
|
559
|
+
data->status = zsv_compare_advance(data);
|
|
560
|
+
if (data->status != zsv_compare_status_ok)
|
|
561
|
+
return data->status;
|
|
562
|
+
|
|
563
|
+
data->row_count++;
|
|
564
|
+
// sort the inputs by ID value first, and input position second
|
|
565
|
+
// for as many inputs have the same smallest ID values, output them as a group
|
|
566
|
+
// and set input->row_loaded to 0
|
|
567
|
+
qsort(data->inputs_to_sort, data->input_count, sizeof(*data->inputs_to_sort), zsv_compare_inputp_cmp);
|
|
568
|
+
|
|
569
|
+
// find the next subset of inputs with identical id values and process those inputs
|
|
570
|
+
unsigned last = 0;
|
|
571
|
+
struct zsv_compare_input *min_input = data->inputs_to_sort[0];
|
|
572
|
+
for (unsigned tmp_i = 1; tmp_i < data->input_count; tmp_i++) {
|
|
573
|
+
struct zsv_compare_input *tmp = data->inputs_to_sort[tmp_i];
|
|
574
|
+
if (!tmp->row_loaded)
|
|
575
|
+
continue;
|
|
576
|
+
if (!zsv_compare_inputp_cmp(&min_input, &tmp)) { // keys are the same
|
|
577
|
+
last = tmp_i;
|
|
578
|
+
continue;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// keys are different
|
|
582
|
+
break;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// print row
|
|
586
|
+
zsv_compare_print_row(data, last);
|
|
587
|
+
|
|
588
|
+
// reset row_loaded
|
|
589
|
+
for (unsigned tmp = 0; tmp <= last; tmp++)
|
|
590
|
+
data->inputs_to_sort[tmp]->row_loaded = 0;
|
|
591
|
+
|
|
592
|
+
return zsv_compare_status_ok;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
static int compare_usage(void) {
|
|
596
|
+
static const char *usage[] = {
|
|
597
|
+
"Usage: compare [options] <file.csv>...",
|
|
598
|
+
"",
|
|
599
|
+
"Options:",
|
|
600
|
+
" -h,--help : show usage",
|
|
601
|
+
" -k,--key <colname> : specify a column to match rows on",
|
|
602
|
+
" can be specified multiple times",
|
|
603
|
+
" -a,--add <colname> : specify an additional column to output",
|
|
604
|
+
" will use the [first input] source",
|
|
605
|
+
" --sort : sort on keys before comparing",
|
|
606
|
+
" --sort-in-memory : for sorting, use in-memory instead of temporary db",
|
|
607
|
+
" (see https://www.sqlite.org/inmemorydb.html)",
|
|
608
|
+
" --tolerance <value>: ignore differences where both values are numeric",
|
|
609
|
+
" strings with values differing by less than the given",
|
|
610
|
+
" amount e.g. --tolerance 0.01 will ignore differences",
|
|
611
|
+
" of numeric strings such as 123.45 vs 123.44",
|
|
612
|
+
" --json : output as JSON",
|
|
613
|
+
" --json-compact : output as compact JSON",
|
|
614
|
+
" --json-object : output as an array of objects",
|
|
615
|
+
" --print-key-colname: when outputting key column diffs,",
|
|
616
|
+
" print column name instead of <key>",
|
|
617
|
+
" -e,--exit-code : return < 0 on error, else the number of differences found",
|
|
618
|
+
"",
|
|
619
|
+
"NOTES",
|
|
620
|
+
"",
|
|
621
|
+
" If no keys are specified, each row from each input is compared to the",
|
|
622
|
+
" row in the corresponding position in each other input (all the first rows",
|
|
623
|
+
" from each input are compared to each other, all the second rows are compared to",
|
|
624
|
+
" each other, etc).",
|
|
625
|
+
"",
|
|
626
|
+
" If one or more key is specified, each input is assumed to already be",
|
|
627
|
+
" lexicographically sorted in ascending order; this is a necessary condition",
|
|
628
|
+
" for the output to be correct (unless the --sort option is used). However, it",
|
|
629
|
+
" is not required for each input to contain the same population of row keys",
|
|
630
|
+
"",
|
|
631
|
+
" The --sort option uses sqlite3 (unindexed) sort and is intended to be a",
|
|
632
|
+
" convenience rather than performance feature. If you need high performance",
|
|
633
|
+
" sorting, other solutions, such as a multi-threaded parallel sort, are likely",
|
|
634
|
+
" superior. For handling quoted data, `2tsv` can be used to convert to a delimited",
|
|
635
|
+
" format without quotes, that can be directly parsed with common UNIX utilities",
|
|
636
|
+
" (such as `sort`), and `select --unescape` can be used to convert back",
|
|
637
|
+
NULL,
|
|
638
|
+
};
|
|
639
|
+
|
|
640
|
+
for (size_t i = 0; usage[i]; i++)
|
|
641
|
+
printf("%s\n", usage[i]);
|
|
642
|
+
|
|
643
|
+
return 0;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// TO DO: consolidate w sql.c, move common code to utils/db.c
|
|
647
|
+
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
|
|
648
|
+
struct zsv_prop_handler *custom_prop_handler) {
|
|
649
|
+
// See sql.c re passing options to sqlite3 when sorting is used
|
|
650
|
+
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
|
|
651
|
+
compare_usage();
|
|
652
|
+
return argc < 2 ? 1 : 0;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// temporarily hold the input file names
|
|
656
|
+
const char **input_filenames = calloc(argc, sizeof(*input_filenames));
|
|
657
|
+
if (!input_filenames)
|
|
658
|
+
return zsv_compare_status_memory;
|
|
659
|
+
|
|
660
|
+
zsv_compare_handle data = zsv_compare_new();
|
|
661
|
+
if (!data) {
|
|
662
|
+
free(input_filenames);
|
|
663
|
+
return zsv_compare_status_memory;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
int err = 0;
|
|
667
|
+
// initialization starts here. to do: make this a separate function
|
|
668
|
+
unsigned input_count = 0;
|
|
669
|
+
struct zsv_compare_key **next_key = &data->keys;
|
|
670
|
+
struct zsv_compare_added_column **added_column_next = &data->added_columns;
|
|
671
|
+
for (int arg_i = 1; data->status == zsv_compare_status_ok && !err && arg_i < argc; arg_i++) {
|
|
672
|
+
const char *arg = argv[arg_i];
|
|
673
|
+
#include <zsv/utils/arg.h>
|
|
674
|
+
if (!strcmp(arg, "-k") || !strcmp(arg, "--key")) {
|
|
675
|
+
const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err);
|
|
676
|
+
if (next_arg) {
|
|
677
|
+
next_key = zsv_compare_key_add(next_key, next_arg, &err);
|
|
678
|
+
data->key_count++;
|
|
679
|
+
}
|
|
680
|
+
} else if (!strcmp(arg, "-a") || !strcmp(arg, "--add")) {
|
|
681
|
+
const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err);
|
|
682
|
+
if (next_arg) {
|
|
683
|
+
zsv_compare_unique_colname *colname;
|
|
684
|
+
if ((data->status = zsv_compare_unique_colname_add(&data->added_colnames, (const unsigned char *)next_arg,
|
|
685
|
+
strlen(next_arg), &colname)) == zsv_compare_status_ok) {
|
|
686
|
+
// add to linked list for use after all data->output_colnames are allocated
|
|
687
|
+
added_column_next = zsv_compare_added_column_add(added_column_next, colname, &data->status);
|
|
688
|
+
if (data->status == zsv_compare_status_ok)
|
|
689
|
+
data->added_colcount++;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
} else if (!strcmp(arg, "--tolerance")) {
|
|
693
|
+
const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err);
|
|
694
|
+
if (next_arg) {
|
|
695
|
+
if (zsv_strtod_exact(next_arg, &data->tolerance.value))
|
|
696
|
+
fprintf(stderr, "Invalid numeric value: %s\n", next_arg), err = 1;
|
|
697
|
+
else if (data->tolerance.value < 0)
|
|
698
|
+
fprintf(stderr, "Tolerance must be greater than zero (got %s)\n", next_arg), err = 1;
|
|
699
|
+
else
|
|
700
|
+
data->tolerance.value = nextafterf(data->tolerance.value, INFINITY);
|
|
701
|
+
}
|
|
702
|
+
} else if (!strcmp(arg, "--sort")) {
|
|
703
|
+
data->sort = 1;
|
|
704
|
+
} else if (!strcmp(arg, "--exit-code") || !strcmp(arg, "-e")) {
|
|
705
|
+
data->return_count = 1;
|
|
706
|
+
} else if (!strcmp(arg, "--json")) {
|
|
707
|
+
data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
|
|
708
|
+
} else if (!strcmp(arg, "--json-object")) {
|
|
709
|
+
data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
|
|
710
|
+
data->writer.object = 1;
|
|
711
|
+
} else if (!strcmp(arg, "--json-compact")) {
|
|
712
|
+
data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
|
|
713
|
+
data->writer.compact = 1;
|
|
714
|
+
} else if (!strcmp(arg, "--print-key-colname")) {
|
|
715
|
+
data->print_key_col_names = 1;
|
|
716
|
+
} else
|
|
717
|
+
input_filenames[input_count++] = arg;
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
struct zsv_opts original_default_opts;
|
|
721
|
+
struct zsv_prop_handler original_default_custom_prop_handler;
|
|
722
|
+
if (data->sort) {
|
|
723
|
+
if (!data->key_count) {
|
|
724
|
+
fprintf(stderr, "Error: --sort requires one or more keys\n");
|
|
725
|
+
data->status = zsv_compare_status_error;
|
|
726
|
+
} else {
|
|
727
|
+
original_default_opts = zsv_get_default_opts();
|
|
728
|
+
zsv_set_default_opts(*opts);
|
|
729
|
+
|
|
730
|
+
if (custom_prop_handler) {
|
|
731
|
+
original_default_custom_prop_handler = zsv_get_default_custom_prop_handler();
|
|
732
|
+
zsv_set_default_custom_prop_handler(*custom_prop_handler);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
if (data->status == zsv_compare_status_ok)
|
|
736
|
+
data->status = zsv_compare_init_sorted(data);
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
if (err && data->status == zsv_compare_status_ok)
|
|
741
|
+
data->status = zsv_compare_status_error;
|
|
742
|
+
else if (!input_count)
|
|
743
|
+
data->status = zsv_compare_status_error;
|
|
744
|
+
else if (data->status == zsv_compare_status_ok) {
|
|
745
|
+
if ((data->status = zsv_compare_set_inputs(data, input_count)) == zsv_compare_status_ok) {
|
|
746
|
+
// initialize parsers
|
|
747
|
+
for (unsigned ix = 0; data->status == zsv_compare_status_ok && ix < input_count; ix++) {
|
|
748
|
+
struct zsv_compare_input *input = &data->inputs[ix];
|
|
749
|
+
input->path = input_filenames[ix];
|
|
750
|
+
data->status = data->input_init(data, input, opts, custom_prop_handler);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
if (data->status == zsv_compare_status_ok) {
|
|
755
|
+
// find keys
|
|
756
|
+
for (unsigned i = 0; data->status == zsv_compare_status_ok && i < data->input_count; i++) {
|
|
757
|
+
struct zsv_compare_input *input = &data->inputs[i];
|
|
758
|
+
if ((input->col_count = data->get_column_count(input))) {
|
|
759
|
+
if (!(input->output_colnames = calloc(input->col_count, sizeof(*input->output_colnames)))) {
|
|
760
|
+
data->status = zsv_compare_status_memory;
|
|
761
|
+
break;
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
unsigned found_keys = 0;
|
|
766
|
+
for (unsigned j = 0; j < input->col_count && !input->done && data->status == zsv_compare_status_ok; j++) {
|
|
767
|
+
struct zsv_cell colname = data->get_column_name(input, j);
|
|
768
|
+
const unsigned char *colname_s = colname.str;
|
|
769
|
+
unsigned colname_len = colname.len;
|
|
770
|
+
zsv_compare_unique_colname *input_col;
|
|
771
|
+
data->status = zsv_compare_unique_colname_add(&input->colnames, colname_s, colname_len, &input_col);
|
|
772
|
+
if (data->status != zsv_compare_status_ok)
|
|
773
|
+
break;
|
|
774
|
+
|
|
775
|
+
if (input_col) {
|
|
776
|
+
// now that we know this colname+instance_num is unique to this input
|
|
777
|
+
// check if it is a key
|
|
778
|
+
for (unsigned key_ix = 0; found_keys < input->key_count && key_ix < input->key_count; key_ix++) {
|
|
779
|
+
struct zsv_compare_input_key *k = &input->keys[key_ix];
|
|
780
|
+
if (!k->found &&
|
|
781
|
+
!zsv_strincmp(colname_s, colname_len, (const unsigned char *)k->key->name, strlen(k->key->name))) {
|
|
782
|
+
k->found = 1;
|
|
783
|
+
found_keys++;
|
|
784
|
+
k->col_ix = j;
|
|
785
|
+
input_col->is_key = 1;
|
|
786
|
+
break;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
// add it to the output
|
|
791
|
+
int added = 0;
|
|
792
|
+
zsv_compare_unique_colname *output_col = zsv_compare_unique_colname_add_if_not_found(
|
|
793
|
+
&data->output_colnames, colname_s, colname_len, input_col->instance_num, &added);
|
|
794
|
+
if (!output_col) // error
|
|
795
|
+
data->status = zsv_compare_status_error;
|
|
796
|
+
else {
|
|
797
|
+
if (added) {
|
|
798
|
+
if (*data->output_colnames_next)
|
|
799
|
+
(*data->output_colnames_next)->next = output_col;
|
|
800
|
+
if (!data->output_colnames_first)
|
|
801
|
+
data->output_colnames_first = output_col;
|
|
802
|
+
|
|
803
|
+
*data->output_colnames_next = output_col;
|
|
804
|
+
output_col->is_key = input_col->is_key;
|
|
805
|
+
data->output_colnames_next = &output_col->next;
|
|
806
|
+
output_col->output_ix = data->output_colcount++;
|
|
807
|
+
}
|
|
808
|
+
input->output_colnames[j] = output_col;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
if (found_keys != data->key_count) {
|
|
814
|
+
fprintf(stderr, "Unable to find the following keys in %s: ", input->path);
|
|
815
|
+
for (unsigned int j = 0; j < input->key_count; j++) {
|
|
816
|
+
struct zsv_compare_input_key *k = &input->keys[j];
|
|
817
|
+
if (!k->found)
|
|
818
|
+
fprintf(stderr, "\n %s", k->key->name);
|
|
819
|
+
}
|
|
820
|
+
fprintf(stderr, "\n");
|
|
821
|
+
data->status = zsv_compare_status_error;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
if (data->status == zsv_compare_status_ok) {
|
|
827
|
+
if (data->output_colcount == 0)
|
|
828
|
+
data->status = zsv_compare_status_no_data;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
char started = 0;
|
|
832
|
+
if (data->status == zsv_compare_status_ok) {
|
|
833
|
+
started = 1;
|
|
834
|
+
zsv_compare_output_begin(data);
|
|
835
|
+
|
|
836
|
+
// match output colnames to added columns
|
|
837
|
+
for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next) {
|
|
838
|
+
zsv_compare_unique_colname col = {0};
|
|
839
|
+
col.name = ac->colname->name;
|
|
840
|
+
col.name_len = ac->colname->name_len;
|
|
841
|
+
col.instance_num = ac->colname->instance_num;
|
|
842
|
+
ac->output_colname = sglib_zsv_compare_unique_colname_find_member(data->output_colnames, &col);
|
|
843
|
+
if (!ac->output_colname)
|
|
844
|
+
fprintf(stderr, "Warning: added column %.*s not found in any input\n", (int)col.name_len, col.name);
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// assign out2in mappings
|
|
848
|
+
for (unsigned i = 0; data->status == zsv_compare_status_ok && i < data->input_count; i++) {
|
|
849
|
+
struct zsv_compare_input *input = &data->inputs[i];
|
|
850
|
+
if (input->done)
|
|
851
|
+
continue;
|
|
852
|
+
if (!(input->out2in = calloc(data->output_colcount, sizeof(*input->out2in))))
|
|
853
|
+
data->status = zsv_compare_status_memory;
|
|
854
|
+
else {
|
|
855
|
+
for (unsigned j = 0; j < input->col_count; j++) {
|
|
856
|
+
zsv_compare_unique_colname *output_col = input->output_colnames[j];
|
|
857
|
+
if (output_col) {
|
|
858
|
+
input->out2in[output_col->output_ix] = j + 1;
|
|
859
|
+
|
|
860
|
+
// check if this should be the source of any additional columns
|
|
861
|
+
for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next) {
|
|
862
|
+
if (!ac->input && ac->output_colname) {
|
|
863
|
+
if (output_col == ac->output_colname) {
|
|
864
|
+
ac->input = input;
|
|
865
|
+
ac->col_ix = j;
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// assertions
|
|
876
|
+
if (data->status == zsv_compare_status_ok) {
|
|
877
|
+
int ok = 0;
|
|
878
|
+
for (unsigned i = 0; i < data->input_count; i++)
|
|
879
|
+
if (!data->inputs[i].done)
|
|
880
|
+
ok++;
|
|
881
|
+
|
|
882
|
+
if (ok < 2) {
|
|
883
|
+
fprintf(stderr, "Compare requires at least two non-empty inputs\n");
|
|
884
|
+
data->status = zsv_compare_status_error;
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
// next, compare each row
|
|
889
|
+
while (data->status == zsv_compare_status_ok && zsv_compare_next(data) == zsv_compare_status_ok)
|
|
890
|
+
;
|
|
891
|
+
if (started)
|
|
892
|
+
zsv_compare_output_end(data);
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
free(input_filenames);
|
|
896
|
+
|
|
897
|
+
err = data->status == zsv_compare_status_ok ? 0 : 1;
|
|
898
|
+
|
|
899
|
+
if (data->sort) {
|
|
900
|
+
zsv_set_default_opts(original_default_opts); // restore default options
|
|
901
|
+
if (custom_prop_handler)
|
|
902
|
+
zsv_set_default_custom_prop_handler(original_default_custom_prop_handler);
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
if (data->return_count) {
|
|
906
|
+
if (err)
|
|
907
|
+
err = -1;
|
|
908
|
+
else
|
|
909
|
+
err = data->diff_count;
|
|
910
|
+
}
|
|
911
|
+
zsv_compare_delete(data);
|
|
912
|
+
return err;
|
|
913
|
+
}
|