zsv 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +36 -0
- data/LICENSE +21 -0
- data/README.md +311 -0
- data/ext/zsv/common.h +34 -0
- data/ext/zsv/extconf.rb +137 -0
- data/ext/zsv/options.c +126 -0
- data/ext/zsv/options.h +31 -0
- data/ext/zsv/options_internal.h +8 -0
- data/ext/zsv/parser.c +300 -0
- data/ext/zsv/parser.h +62 -0
- data/ext/zsv/row.c +122 -0
- data/ext/zsv/row.h +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
- data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
- data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
- data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
- data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
- data/ext/zsv/zsv_ext.c +343 -0
- data/lib/zsv/version.rb +5 -0
- data/lib/zsv.rb +81 -0
- metadata +340 -0
|
@@ -0,0 +1,851 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2021 Liquidaty and zsv contributors. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* This file is part of zsv/lib, distributed under the MIT license as defined at
|
|
5
|
+
* https://opensource.org/licenses/MIT
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <stdio.h>
|
|
9
|
+
#include <stdlib.h>
|
|
10
|
+
#include <string.h>
|
|
11
|
+
#include <sglib.h>
|
|
12
|
+
|
|
13
|
+
#include <unistd.h> // unlink
|
|
14
|
+
|
|
15
|
+
#define ZSV_COMMAND flatten
|
|
16
|
+
#include "zsv_command.h"
|
|
17
|
+
|
|
18
|
+
#include <zsv/utils/writer.h>
|
|
19
|
+
#include <zsv/utils/file.h>
|
|
20
|
+
#include <zsv/utils/utf8.h>
|
|
21
|
+
#include <zsv/utils/mem.h>
|
|
22
|
+
#include <zsv/utils/string.h>
|
|
23
|
+
#include <memfile.h>
|
|
24
|
+
#include <jsonwriter.h>
|
|
25
|
+
|
|
26
|
+
enum flatten_agg_method {
|
|
27
|
+
flatten_agg_method_none = 1,
|
|
28
|
+
flatten_agg_method_delim,
|
|
29
|
+
flatten_agg_method_json
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
struct flatten_column_name_and_ix {
|
|
33
|
+
unsigned char *name;
|
|
34
|
+
size_t name_len;
|
|
35
|
+
unsigned int ix_plus_1;
|
|
36
|
+
unsigned char free_name : 1;
|
|
37
|
+
unsigned char dummy : 7;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
struct chars_list {
|
|
41
|
+
struct chars_list *next;
|
|
42
|
+
unsigned char *value;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
static struct chars_list *chars_list_new(const unsigned char *utf8_value, size_t len) {
|
|
46
|
+
struct chars_list *e = calloc(1, sizeof(*e));
|
|
47
|
+
if (e)
|
|
48
|
+
e->value = zsv_memdup(utf8_value, len);
|
|
49
|
+
return e;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
#ifndef FREEIF
|
|
53
|
+
#define FREEIF(x) \
|
|
54
|
+
if (x) \
|
|
55
|
+
free(x), x = NULL
|
|
56
|
+
#endif
|
|
57
|
+
|
|
58
|
+
static void chars_lists_delete(struct chars_list **p) {
|
|
59
|
+
if (p && *p) {
|
|
60
|
+
struct chars_list *next;
|
|
61
|
+
for (struct chars_list *e = *p; e; e = next) {
|
|
62
|
+
next = e->next;
|
|
63
|
+
FREEIF(e->value);
|
|
64
|
+
free(e);
|
|
65
|
+
}
|
|
66
|
+
*p = NULL;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
struct flatten_agg_col {
|
|
71
|
+
struct flatten_agg_col *next;
|
|
72
|
+
struct flatten_column_name_and_ix column;
|
|
73
|
+
struct chars_list *values, **last_value;
|
|
74
|
+
enum flatten_agg_method agg_method;
|
|
75
|
+
unsigned char *delimiter;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
struct flatten_agg_col_iterator {
|
|
79
|
+
unsigned char *str;
|
|
80
|
+
size_t len;
|
|
81
|
+
|
|
82
|
+
// internal use only
|
|
83
|
+
struct chars_list *current_cl;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
static void flatten_agg_col_iterator_init(struct flatten_agg_col *c, struct flatten_agg_col_iterator *i) {
|
|
87
|
+
memset(i, 0, sizeof(*i));
|
|
88
|
+
switch (c->agg_method) {
|
|
89
|
+
case flatten_agg_method_json:
|
|
90
|
+
case flatten_agg_method_delim:
|
|
91
|
+
if ((i->current_cl = c->values))
|
|
92
|
+
i->str = i->current_cl->value;
|
|
93
|
+
break;
|
|
94
|
+
default:
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
static void flatten_agg_col_iterator_replace_str(struct flatten_agg_col_iterator *i, unsigned char **new_s) {
|
|
100
|
+
if (i->current_cl)
|
|
101
|
+
i->current_cl->value = *new_s;
|
|
102
|
+
else {
|
|
103
|
+
fprintf(stderr, "flatten_agg_col_iterator_replace_str() error: no current value to replace\n");
|
|
104
|
+
free(*new_s);
|
|
105
|
+
*new_s = NULL;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
static void flatten_agg_col_iterator_next(struct flatten_agg_col_iterator *i) {
|
|
110
|
+
if (i->current_cl && (i->current_cl = i->current_cl->next))
|
|
111
|
+
i->str = i->current_cl->value;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
static char flatten_agg_col_iterator_done(struct flatten_agg_col_iterator *i) {
|
|
115
|
+
return i->current_cl ? 0 : 1;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
static const unsigned char *flatten_agg_col_delimiter(struct flatten_agg_col *c) {
|
|
119
|
+
if (c->delimiter)
|
|
120
|
+
return c->delimiter;
|
|
121
|
+
switch (c->agg_method) {
|
|
122
|
+
case flatten_agg_method_json:
|
|
123
|
+
return NULL;
|
|
124
|
+
case flatten_agg_method_none:
|
|
125
|
+
case flatten_agg_method_delim:
|
|
126
|
+
return (const unsigned char *)"|";
|
|
127
|
+
}
|
|
128
|
+
return (const unsigned char *)"|";
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
static void flatten_agg_col_add_value(struct flatten_agg_col *c, const unsigned char *utf8_value, size_t len) {
|
|
132
|
+
if (!c->last_value)
|
|
133
|
+
c->last_value = &c->values;
|
|
134
|
+
struct chars_list *e = chars_list_new(utf8_value, len);
|
|
135
|
+
if (e) {
|
|
136
|
+
*c->last_value = e;
|
|
137
|
+
c->last_value = &e->next;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
typedef struct flatten_output_column {
|
|
142
|
+
struct flatten_output_column *next;
|
|
143
|
+
unsigned char *name;
|
|
144
|
+
size_t name_len;
|
|
145
|
+
unsigned char *compare_name; // same as name, unless case-insensitive in which case, lower case
|
|
146
|
+
unsigned char *current_value;
|
|
147
|
+
|
|
148
|
+
struct flatten_output_column *left;
|
|
149
|
+
struct flatten_output_column *right;
|
|
150
|
+
unsigned char color : 1;
|
|
151
|
+
unsigned char dummy : 7;
|
|
152
|
+
} flatten_output_column;
|
|
153
|
+
|
|
154
|
+
void flatten_output_column_free(struct flatten_output_column *e) {
|
|
155
|
+
FREEIF(e->name);
|
|
156
|
+
FREEIF(e->compare_name);
|
|
157
|
+
FREEIF(e->current_value);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
int flatten_output_column_compare(flatten_output_column *x, flatten_output_column *y) {
|
|
161
|
+
return strcmp((char *)x->compare_name, (char *)y->compare_name);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
SGLIB_DEFINE_RBTREE_PROTOTYPES(flatten_output_column, left, right, color, flatten_output_column_compare);
|
|
165
|
+
|
|
166
|
+
SGLIB_DEFINE_RBTREE_FUNCTIONS(flatten_output_column, left, right, color, flatten_output_column_compare);
|
|
167
|
+
|
|
168
|
+
struct flatten_data {
|
|
169
|
+
unsigned int max_cols;
|
|
170
|
+
unsigned int output_column_total_count;
|
|
171
|
+
|
|
172
|
+
struct flatten_output_column *output_columns_by_value;
|
|
173
|
+
|
|
174
|
+
// output_columns_by_value, linked list
|
|
175
|
+
struct flatten_output_column *output_columns_by_value_head;
|
|
176
|
+
struct flatten_output_column **output_columns_by_value_tail;
|
|
177
|
+
|
|
178
|
+
unsigned int current_column_index;
|
|
179
|
+
unsigned int row_count;
|
|
180
|
+
unsigned int row_count2;
|
|
181
|
+
unsigned int output_row;
|
|
182
|
+
|
|
183
|
+
struct flatten_column_name_and_ix row_id_column;
|
|
184
|
+
struct flatten_column_name_and_ix column_name_column;
|
|
185
|
+
struct flatten_column_name_and_ix value_column;
|
|
186
|
+
|
|
187
|
+
struct flatten_output_column *current_column_name_column;
|
|
188
|
+
unsigned char *current_column_name_value;
|
|
189
|
+
|
|
190
|
+
unsigned char *last_asset_id;
|
|
191
|
+
size_t last_asset_id_len;
|
|
192
|
+
unsigned char *current_asset_id; // will equal last_asset_id if they are the same
|
|
193
|
+
size_t current_asset_id_len;
|
|
194
|
+
|
|
195
|
+
const char *output_filename;
|
|
196
|
+
|
|
197
|
+
FILE *in;
|
|
198
|
+
FILE *out;
|
|
199
|
+
const char *input_path;
|
|
200
|
+
|
|
201
|
+
zsv_csv_writer csv_writer;
|
|
202
|
+
|
|
203
|
+
struct flatten_agg_col *agg_output_cols;
|
|
204
|
+
struct flatten_agg_col **agg_output_cols_vector;
|
|
205
|
+
unsigned int agg_output_cols_vector_size;
|
|
206
|
+
|
|
207
|
+
int max_rows_per_aggregation;
|
|
208
|
+
|
|
209
|
+
// for json output: jsw and memfile
|
|
210
|
+
jsonwriter_handle jsw;
|
|
211
|
+
memfile_t memfile;
|
|
212
|
+
|
|
213
|
+
enum flatten_agg_method all_aggregation_method;
|
|
214
|
+
|
|
215
|
+
unsigned char cancelled : 1;
|
|
216
|
+
unsigned char verbose : 1;
|
|
217
|
+
unsigned char have_agg : 1;
|
|
218
|
+
unsigned char dummy : 5;
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
static int flatten_output_column_add(struct flatten_data *data, const unsigned char *utf8_value, size_t len,
|
|
222
|
+
unsigned char *compare_name) {
|
|
223
|
+
if (data->output_column_total_count == data->max_cols) {
|
|
224
|
+
free(compare_name);
|
|
225
|
+
return zsv_printerr(1, "ERROR: Maximum number of columns (%i) exceeded", data->max_cols);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
struct flatten_output_column *new_output_column = calloc(1, sizeof(*new_output_column));
|
|
229
|
+
new_output_column->name = zsv_memdup(utf8_value, len);
|
|
230
|
+
new_output_column->name_len = len;
|
|
231
|
+
new_output_column->compare_name = compare_name;
|
|
232
|
+
|
|
233
|
+
// add to rbtree
|
|
234
|
+
sglib_flatten_output_column_add(&data->output_columns_by_value, new_output_column);
|
|
235
|
+
|
|
236
|
+
// also add to linked list
|
|
237
|
+
*data->output_columns_by_value_tail = new_output_column;
|
|
238
|
+
data->output_columns_by_value_tail = &new_output_column->next;
|
|
239
|
+
data->output_column_total_count++;
|
|
240
|
+
return 0;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
static flatten_output_column *flatten_output_column_find(struct flatten_data *data, const unsigned char *utf8_value,
|
|
244
|
+
size_t len, unsigned char **compare_name) {
|
|
245
|
+
flatten_output_column node, *found;
|
|
246
|
+
node.compare_name = zsv_strtolowercase(utf8_value, &len);
|
|
247
|
+
if (node.compare_name) {
|
|
248
|
+
if ((found = sglib_flatten_output_column_find_member(data->output_columns_by_value, &node))) {
|
|
249
|
+
free(node.compare_name);
|
|
250
|
+
return found;
|
|
251
|
+
}
|
|
252
|
+
// not found
|
|
253
|
+
if (compare_name)
|
|
254
|
+
*compare_name = node.compare_name;
|
|
255
|
+
else
|
|
256
|
+
free(node.compare_name);
|
|
257
|
+
}
|
|
258
|
+
return NULL;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
static void set_cnx(struct flatten_column_name_and_ix *cnx, const unsigned char *utf8_value, size_t len,
|
|
262
|
+
unsigned int current_column_ix) {
|
|
263
|
+
if (!cnx->ix_plus_1) {
|
|
264
|
+
if (!cnx->name) { // none provided, assume its the next column
|
|
265
|
+
if ((cnx->name = zsv_memdup(utf8_value, len))) {
|
|
266
|
+
cnx->free_name = 1;
|
|
267
|
+
cnx->name_len = len;
|
|
268
|
+
}
|
|
269
|
+
cnx->ix_plus_1 = current_column_ix + 1;
|
|
270
|
+
} else if (!zsv_strincmp(cnx->name, len, utf8_value, len))
|
|
271
|
+
cnx->ix_plus_1 = current_column_ix + 1;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// flatten_cell1(): for any value in the "column name" column, add it to the list of columns
|
|
276
|
+
static void flatten_cell1(void *hook, unsigned char *utf8_value, size_t len) {
|
|
277
|
+
struct flatten_data *data = hook;
|
|
278
|
+
if (!data->cancelled) {
|
|
279
|
+
if (data->row_count == 0) {
|
|
280
|
+
struct flatten_column_name_and_ix *cnxlist[] = {&data->row_id_column, &data->column_name_column,
|
|
281
|
+
&data->value_column};
|
|
282
|
+
for (unsigned int i = 0; i < 3; i++)
|
|
283
|
+
if (cnxlist[i]->name || (!data->have_agg && i == data->current_column_index))
|
|
284
|
+
set_cnx(cnxlist[i], utf8_value, len, data->current_column_index);
|
|
285
|
+
} else if (data->current_column_index + 1 == data->column_name_column.ix_plus_1) {
|
|
286
|
+
// we are in the "column name" column, so make sure we've added this to our columns to output
|
|
287
|
+
unsigned char *compare_name = NULL;
|
|
288
|
+
if (!flatten_output_column_find(data, utf8_value, len, &compare_name) && compare_name)
|
|
289
|
+
data->cancelled = flatten_output_column_add(data, utf8_value, len, compare_name);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
data->current_column_index++;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
static void flatten_row1(void *hook) {
|
|
296
|
+
struct flatten_data *data = hook;
|
|
297
|
+
if (data->cancelled)
|
|
298
|
+
return;
|
|
299
|
+
data->row_count++;
|
|
300
|
+
data->current_column_index = 0;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
static void flatten_cell2(void *hook, unsigned char *utf8_value, size_t len) {
|
|
304
|
+
struct flatten_data *data = hook;
|
|
305
|
+
if (!data->cancelled) {
|
|
306
|
+
if (data->row_count2 == 0) {
|
|
307
|
+
if (!data->row_id_column.ix_plus_1)
|
|
308
|
+
if (data->row_id_column.name || !data->have_agg)
|
|
309
|
+
set_cnx(&data->row_id_column, utf8_value, len, data->current_column_index);
|
|
310
|
+
|
|
311
|
+
for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next) {
|
|
312
|
+
if (c->column.name_len == len && !zsv_strincmp(c->column.name, len, utf8_value, len))
|
|
313
|
+
c->column.ix_plus_1 = data->current_column_index + 1;
|
|
314
|
+
}
|
|
315
|
+
} else {
|
|
316
|
+
if (data->current_column_index < data->agg_output_cols_vector_size) {
|
|
317
|
+
struct flatten_agg_col *c = data->agg_output_cols_vector[data->current_column_index];
|
|
318
|
+
if (c)
|
|
319
|
+
flatten_agg_col_add_value(c, utf8_value, len);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
if (data->current_column_index + 1 == data->column_name_column.ix_plus_1) // column name
|
|
323
|
+
data->current_column_name_column = flatten_output_column_find(data, utf8_value, len, NULL);
|
|
324
|
+
|
|
325
|
+
else if (data->current_column_index + 1 == data->value_column.ix_plus_1) // value
|
|
326
|
+
data->current_column_name_value = zsv_memdup(utf8_value, len);
|
|
327
|
+
|
|
328
|
+
else if (data->current_column_index + 1 == data->row_id_column.ix_plus_1) { // asset ID
|
|
329
|
+
if (!data->last_asset_id) { // no prior asset, so this is the first one
|
|
330
|
+
data->last_asset_id = data->current_asset_id = zsv_memdup(utf8_value, len);
|
|
331
|
+
data->last_asset_id_len = len;
|
|
332
|
+
} else if (len != data->last_asset_id_len || memcmp(data->last_asset_id, utf8_value, len)) {
|
|
333
|
+
// this is a different asset from the last one
|
|
334
|
+
data->current_asset_id = zsv_memdup(utf8_value, len);
|
|
335
|
+
data->current_asset_id_len = len;
|
|
336
|
+
} else { // same as last asset
|
|
337
|
+
data->current_asset_id = data->last_asset_id;
|
|
338
|
+
data->current_asset_id_len = data->last_asset_id_len;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
data->current_column_index++;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
static void flatten_output_header(struct flatten_data *data) {
|
|
347
|
+
zsv_writer_cell(data->csv_writer, 1, data->row_id_column.name, data->row_id_column.name_len, 1);
|
|
348
|
+
unsigned int i = 1;
|
|
349
|
+
for (struct flatten_output_column *col = data->output_columns_by_value_head; col; col = col->next, i++) {
|
|
350
|
+
zsv_writer_cell(data->csv_writer, 0, col->name, col->name_len, 1);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next)
|
|
354
|
+
zsv_writer_cell(data->csv_writer, !i++, c->column.name, c->column.name_len, 1);
|
|
355
|
+
data->output_row = 1;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
static unsigned char *flatten_replace_delim(unsigned char *inout, const unsigned char *delimiter, char replacement) {
|
|
359
|
+
if (!inout)
|
|
360
|
+
return NULL;
|
|
361
|
+
|
|
362
|
+
if (!strstr((char *)inout, (char *)delimiter))
|
|
363
|
+
return inout;
|
|
364
|
+
|
|
365
|
+
unsigned int delim_len = strlen((char *)delimiter);
|
|
366
|
+
unsigned int j = strlen((char *)inout);
|
|
367
|
+
unsigned char *new_s = malloc(j + 1);
|
|
368
|
+
int new_s_len = 0;
|
|
369
|
+
char clen;
|
|
370
|
+
for (unsigned int i = 0; i < j; i += clen) {
|
|
371
|
+
clen = ZSV_UTF8_CHARLEN_NOERR((int)inout[i]);
|
|
372
|
+
if (i + clen <= j && strncmp((char *)inout + i, (char *)delimiter, delim_len))
|
|
373
|
+
for (int k = 0; k < clen; k++)
|
|
374
|
+
new_s[new_s_len++] = inout[i + k];
|
|
375
|
+
else
|
|
376
|
+
new_s[new_s_len++] = replacement;
|
|
377
|
+
}
|
|
378
|
+
if (new_s)
|
|
379
|
+
new_s[new_s_len++] = 0;
|
|
380
|
+
free(inout);
|
|
381
|
+
return new_s;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
static void output_current_row(struct flatten_data *data) {
|
|
385
|
+
if (data->last_asset_id) {
|
|
386
|
+
data->output_row++;
|
|
387
|
+
zsv_writer_cell(data->csv_writer, 1, data->last_asset_id, data->last_asset_id_len, 1);
|
|
388
|
+
for (struct flatten_output_column *col = data->output_columns_by_value_head; col; col = col->next) {
|
|
389
|
+
zsv_writer_cell(data->csv_writer, 0, col->current_value,
|
|
390
|
+
col->current_value ? strlen((char *)col->current_value) : 0, 1);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next) {
|
|
394
|
+
unsigned char *value_to_print = NULL;
|
|
395
|
+
size_t length_to_print = 0;
|
|
396
|
+
struct flatten_agg_col_iterator it;
|
|
397
|
+
if (c->agg_method == flatten_agg_method_json) {
|
|
398
|
+
memfile_reset(data->memfile);
|
|
399
|
+
jsonwriter_start_array(data->jsw);
|
|
400
|
+
for (flatten_agg_col_iterator_init(c, &it); !flatten_agg_col_iterator_done(&it);
|
|
401
|
+
flatten_agg_col_iterator_next(&it)) {
|
|
402
|
+
// jsonwriter_str(data->jsw, it.str);
|
|
403
|
+
if (!it.str || !*it.str)
|
|
404
|
+
jsonwriter_null(data->jsw);
|
|
405
|
+
else
|
|
406
|
+
jsonwriter_unknown(data->jsw, it.str, strlen((const char *)it.str), 0);
|
|
407
|
+
}
|
|
408
|
+
jsonwriter_end_array(data->jsw);
|
|
409
|
+
jsonwriter_flush(data->jsw);
|
|
410
|
+
value_to_print = memfile_data(data->memfile);
|
|
411
|
+
length_to_print = (size_t)memfile_tell(data->memfile);
|
|
412
|
+
} else {
|
|
413
|
+
const unsigned char *delimiter = flatten_agg_col_delimiter(c);
|
|
414
|
+
if (!delimiter)
|
|
415
|
+
delimiter = (const unsigned char *)"";
|
|
416
|
+
size_t delimiter_len = strlen((const char *)delimiter);
|
|
417
|
+
const char replacement = (*delimiter == '_' ? '.' : '_');
|
|
418
|
+
|
|
419
|
+
// first, calc the length of joined string that we will need to create
|
|
420
|
+
size_t joined_len = 0;
|
|
421
|
+
|
|
422
|
+
int i = 0;
|
|
423
|
+
for (flatten_agg_col_iterator_init(c, &it); !flatten_agg_col_iterator_done(&it);
|
|
424
|
+
flatten_agg_col_iterator_next(&it), i++) {
|
|
425
|
+
if (i)
|
|
426
|
+
joined_len += delimiter_len;
|
|
427
|
+
it.str = flatten_replace_delim(it.str, delimiter, replacement);
|
|
428
|
+
flatten_agg_col_iterator_replace_str(&it, &it.str);
|
|
429
|
+
if (it.str && *it.str)
|
|
430
|
+
joined_len += strlen((char *)it.str);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
if (joined_len && (value_to_print = malloc(joined_len))) {
|
|
434
|
+
unsigned char *cursor = value_to_print;
|
|
435
|
+
length_to_print = joined_len;
|
|
436
|
+
|
|
437
|
+
i = 0;
|
|
438
|
+
for (flatten_agg_col_iterator_init(c, &it); !flatten_agg_col_iterator_done(&it);
|
|
439
|
+
flatten_agg_col_iterator_next(&it), i++) {
|
|
440
|
+
// append delimiter
|
|
441
|
+
if (i) {
|
|
442
|
+
memcpy(cursor, delimiter, delimiter_len);
|
|
443
|
+
cursor += delimiter_len;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// append value
|
|
447
|
+
if (it.str && *it.str) {
|
|
448
|
+
size_t len = strlen((char *)it.str);
|
|
449
|
+
memcpy(cursor, it.str, len);
|
|
450
|
+
cursor += len;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
zsv_writer_cell(data->csv_writer, 0, value_to_print, length_to_print, 1);
|
|
456
|
+
if (c->agg_method != flatten_agg_method_json)
|
|
457
|
+
free(value_to_print);
|
|
458
|
+
chars_lists_delete(&c->values);
|
|
459
|
+
c->last_value = NULL;
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
for (struct flatten_output_column *col = data->output_columns_by_value_head; col; col = col->next)
|
|
464
|
+
FREEIF(col->current_value);
|
|
465
|
+
FREEIF(data->last_asset_id);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
static void flatten_row2(void *hook) {
|
|
469
|
+
struct flatten_data *data = hook;
|
|
470
|
+
if (data->row_count2 == 0) {
|
|
471
|
+
if (!data->row_id_column.ix_plus_1)
|
|
472
|
+
fprintf(stderr, "No ID column found\n");
|
|
473
|
+
if (data->current_column_index) {
|
|
474
|
+
// set up the agg column vector
|
|
475
|
+
data->agg_output_cols_vector_size = data->current_column_index;
|
|
476
|
+
data->agg_output_cols_vector = calloc(data->agg_output_cols_vector_size, sizeof(*data->agg_output_cols_vector));
|
|
477
|
+
for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next) {
|
|
478
|
+
if (c->column.ix_plus_1)
|
|
479
|
+
data->agg_output_cols_vector[c->column.ix_plus_1 - 1] = c;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
} else {
|
|
483
|
+
if (!data->current_asset_id && !data->last_asset_id)
|
|
484
|
+
fprintf(stderr, "Warning: disregarding row %i: no asset id\n", data->row_count2);
|
|
485
|
+
else {
|
|
486
|
+
if (data->last_asset_id && data->last_asset_id != data->current_asset_id) {
|
|
487
|
+
output_current_row(data);
|
|
488
|
+
data->last_asset_id = data->current_asset_id;
|
|
489
|
+
data->last_asset_id_len = data->current_asset_id_len;
|
|
490
|
+
}
|
|
491
|
+
if (data->current_column_name_column && data->current_column_name_value) {
|
|
492
|
+
if (data->current_column_name_column->current_value) {
|
|
493
|
+
fprintf(stderr, "Warning: multiple values for column %s, id %s: %s and %s\n",
|
|
494
|
+
data->current_column_name_column->name, data->last_asset_id,
|
|
495
|
+
data->current_column_name_column->current_value, data->current_column_name_value);
|
|
496
|
+
FREEIF(data->current_column_name_column->current_value);
|
|
497
|
+
}
|
|
498
|
+
data->current_column_name_column->current_value = data->current_column_name_value;
|
|
499
|
+
data->current_column_name_value = NULL;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
data->current_column_name_column = NULL;
|
|
503
|
+
FREEIF(data->current_column_name_value);
|
|
504
|
+
}
|
|
505
|
+
data->current_column_index = 0;
|
|
506
|
+
data->row_count2++;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
const char *flatten_usage_msg[] = {
|
|
510
|
+
APPNAME ": flatten a table",
|
|
511
|
+
" based on a single-column key, assuming that rows to flatten always",
|
|
512
|
+
" appear in contiguous lines",
|
|
513
|
+
"",
|
|
514
|
+
"Usage: " APPNAME " [<filename>] [<options>] -- [aggregate_output_spec ...]",
|
|
515
|
+
"",
|
|
516
|
+
"Each aggregate output specification consists of the column name or index, followed",
|
|
517
|
+
// "either (i) a single-column aggregation or (future: (ii) the \"*\" placeholder (in conjunction with -a)).",
|
|
518
|
+
"by the equal sign (=) and then an aggregation method, except that",
|
|
519
|
+
"no equal sign suffix is needed if the --default-agg option is specified.",
|
|
520
|
+
"If a column name contains an equal sign, it must be escaped with a preceding backslash.",
|
|
521
|
+
"",
|
|
522
|
+
"Aggregation methods:",
|
|
523
|
+
// " max",
|
|
524
|
+
// " min",
|
|
525
|
+
" json (json array)",
|
|
526
|
+
" delim (pipe-delimited)",
|
|
527
|
+
// " arrayjs (json)",
|
|
528
|
+
" delim_<delim> (user-specified delimiter)",
|
|
529
|
+
// " unique (pipe-delimited)",
|
|
530
|
+
// " uniquejs (json)",
|
|
531
|
+
// " unique_<delim> (user-specified delimiter)",
|
|
532
|
+
"",
|
|
533
|
+
"Options:",
|
|
534
|
+
" -b : output with BOM",
|
|
535
|
+
" -v,--verbose : display verbose messages",
|
|
536
|
+
" -C <max_columns_to_output> : maximum number of columns to output",
|
|
537
|
+
" -m <max_rows_per_aggregation> : maximum number of rows (default: 1024)",
|
|
538
|
+
" --row-id <column_name> : column name to group by",
|
|
539
|
+
" --col-name <column_name> : column name specifying the output column name",
|
|
540
|
+
" -V <column_name> : column name specifying the output value",
|
|
541
|
+
// " --default-agg <method> : default aggregation method to use, if none specified",
|
|
542
|
+
" -o <filename> : filename to save output to",
|
|
543
|
+
NULL,
|
|
544
|
+
};
|
|
545
|
+
|
|
546
|
+
/*
|
|
547
|
+
EXAMPLE
|
|
548
|
+
echo 'row,col,val
|
|
549
|
+
> A,ltv,100
|
|
550
|
+
> A,loanid,A
|
|
551
|
+
> A,hi,there
|
|
552
|
+
> B,loanid,B
|
|
553
|
+
> B,ltv,90
|
|
554
|
+
> B,hi,you
|
|
555
|
+
> B,xxx,zzz' | zsv flatten --row-id row --col-name col -V val
|
|
556
|
+
row,ltv,loanid,hi,xxx
|
|
557
|
+
A,100,A,there,
|
|
558
|
+
B,90,B,you,zzz
|
|
559
|
+
*/
|
|
560
|
+
|
|
561
|
+
static void flatten_usage(void) {
|
|
562
|
+
for (size_t i = 0; flatten_usage_msg[i]; i++)
|
|
563
|
+
fprintf(stdout, "%s\n", flatten_usage_msg[i]);
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
void flatten_agg_cols_delete(struct flatten_agg_col **p) {
|
|
567
|
+
if (p && *p) {
|
|
568
|
+
struct flatten_agg_col *next;
|
|
569
|
+
for (struct flatten_agg_col *e = *p; e; e = next) {
|
|
570
|
+
next = e->next;
|
|
571
|
+
FREEIF(e->column.name);
|
|
572
|
+
chars_lists_delete(&e->values);
|
|
573
|
+
free(e);
|
|
574
|
+
}
|
|
575
|
+
*p = NULL;
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
static struct flatten_agg_col *flatten_agg_col_new(const char *arg, int *err) {
|
|
580
|
+
struct flatten_agg_col *e = calloc(1, sizeof(*e));
|
|
581
|
+
if ((e->column.name = (unsigned char *)strdup(arg))) {
|
|
582
|
+
e->column.name_len = strlen(arg);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
unsigned char *write = e->column.name;
|
|
586
|
+
unsigned char *write_end = e->column.name + e->column.name_len;
|
|
587
|
+
unsigned char *read = e->column.name;
|
|
588
|
+
|
|
589
|
+
unsigned char *agg_method_s = NULL;
|
|
590
|
+
|
|
591
|
+
while (read && *read) {
|
|
592
|
+
if (*read == '=') { // end of name!
|
|
593
|
+
*read = '\0';
|
|
594
|
+
agg_method_s = read + 1;
|
|
595
|
+
e->column.name_len = read - e->column.name;
|
|
596
|
+
break;
|
|
597
|
+
} else if (*read == '\\') {
|
|
598
|
+
read++;
|
|
599
|
+
if (!*read)
|
|
600
|
+
break;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
*write = *read;
|
|
604
|
+
write++;
|
|
605
|
+
read++;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
if (agg_method_s) {
|
|
609
|
+
// for backward-compatibility, "array" or "array_" are treated the same as "delim" or "delim_"
|
|
610
|
+
if (!strcmp((const char *)agg_method_s, "array") || !strcmp((const char *)agg_method_s, "delim"))
|
|
611
|
+
e->agg_method = flatten_agg_method_delim;
|
|
612
|
+
else if (!strcmp((const char *)agg_method_s, "json"))
|
|
613
|
+
e->agg_method = flatten_agg_method_json;
|
|
614
|
+
else if ((!strncmp((const char *)agg_method_s, "array_", strlen("array_")) &&
|
|
615
|
+
strlen((const char *)agg_method_s) > strlen("array_"))) {
|
|
616
|
+
e->agg_method = flatten_agg_method_delim;
|
|
617
|
+
e->delimiter = agg_method_s + strlen("array_");
|
|
618
|
+
} else if ((!strncmp((const char *)agg_method_s, "delim_", strlen("delim_")) &&
|
|
619
|
+
strlen((const char *)agg_method_s) > strlen("delim_"))) {
|
|
620
|
+
e->agg_method = flatten_agg_method_delim;
|
|
621
|
+
e->delimiter = agg_method_s + strlen("delim_");
|
|
622
|
+
} else
|
|
623
|
+
*err =
|
|
624
|
+
zsv_printerr(1, "Unrecognized aggregation method (expected json, delim or delim_<delim>): %s", agg_method_s);
|
|
625
|
+
} else {
|
|
626
|
+
*err = zsv_printerr(1, "No aggregation method specified for %s", arg);
|
|
627
|
+
while (write < write_end) {
|
|
628
|
+
*write = '\0';
|
|
629
|
+
write++;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
if (!e->agg_method) {
|
|
633
|
+
*err = 1;
|
|
634
|
+
flatten_agg_cols_delete(&e);
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
return e;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
static void flatten_cleanup(struct flatten_data *data) {
|
|
641
|
+
flatten_agg_cols_delete(&data->agg_output_cols);
|
|
642
|
+
|
|
643
|
+
if (data->in && data->in != stdin)
|
|
644
|
+
fclose(data->in);
|
|
645
|
+
|
|
646
|
+
struct flatten_column_name_and_ix *cnxlist[] = {&data->row_id_column, &data->column_name_column, &data->value_column};
|
|
647
|
+
for (int i = 0; i < 3; i++) {
|
|
648
|
+
struct flatten_column_name_and_ix *cnx = cnxlist[i];
|
|
649
|
+
if (cnx->free_name)
|
|
650
|
+
free(cnx->name);
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
for (struct flatten_output_column *next, *e = data->output_columns_by_value_head; e; e = next) {
|
|
654
|
+
next = e->next;
|
|
655
|
+
flatten_output_column_free(e);
|
|
656
|
+
free(e);
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
FREEIF(data->agg_output_cols_vector);
|
|
660
|
+
zsv_writer_delete(data->csv_writer);
|
|
661
|
+
if (data->out && data->out != stdout)
|
|
662
|
+
fclose(data->out);
|
|
663
|
+
|
|
664
|
+
if (data->jsw)
|
|
665
|
+
jsonwriter_delete(data->jsw);
|
|
666
|
+
if (data->memfile)
|
|
667
|
+
memfile_close(data->memfile);
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
|
|
671
|
+
struct zsv_prop_handler *custom_prop_handler) {
|
|
672
|
+
if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
|
|
673
|
+
flatten_usage();
|
|
674
|
+
return 0;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
struct zsv_opts opts = *optsp;
|
|
678
|
+
struct flatten_data data = {0};
|
|
679
|
+
struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
|
|
680
|
+
|
|
681
|
+
data.output_columns_by_value_tail = &data.output_columns_by_value_head;
|
|
682
|
+
data.max_rows_per_aggregation = 1024;
|
|
683
|
+
data.max_cols = 1024;
|
|
684
|
+
|
|
685
|
+
int err = 0;
|
|
686
|
+
int agg_arg_i = 0;
|
|
687
|
+
|
|
688
|
+
for (int arg_i = 1; arg_i < argc; arg_i++) {
|
|
689
|
+
if (!strcmp(argv[arg_i], "--")) {
|
|
690
|
+
agg_arg_i = arg_i + 1;
|
|
691
|
+
break;
|
|
692
|
+
} else if (!strcmp(argv[arg_i], "-b"))
|
|
693
|
+
writer_opts.with_bom = 1;
|
|
694
|
+
else if (!strcmp(argv[arg_i], "-C")) {
|
|
695
|
+
if (!(arg_i + 1 < argc && atoi(argv[arg_i + 1]) > 9))
|
|
696
|
+
err = zsv_printerr(1, "%s invalid: should be positive integer > 9 (got %s)", argv[arg_i], argv[arg_i + 1]);
|
|
697
|
+
else
|
|
698
|
+
data.max_cols = atoi(argv[++arg_i]);
|
|
699
|
+
} else if (!strcmp(argv[arg_i], "-m")) {
|
|
700
|
+
if (!(arg_i + 1 < argc && atoi(argv[arg_i + 1]) > 1))
|
|
701
|
+
err = zsv_printerr(1, "%s invalid: should be positive integer > 1 (got %s)", argv[arg_i], argv[arg_i + 1]);
|
|
702
|
+
else
|
|
703
|
+
data.max_rows_per_aggregation = atoi(argv[++arg_i]);
|
|
704
|
+
} else if (!strcmp(argv[arg_i], "--row-id")) { // used to be -i
|
|
705
|
+
if (!(arg_i + 1 < argc && *argv[arg_i + 1]))
|
|
706
|
+
err = zsv_printerr(1, "%s option: missing column name", argv[arg_i]);
|
|
707
|
+
else {
|
|
708
|
+
data.row_id_column.name = (unsigned char *)argv[++arg_i];
|
|
709
|
+
data.row_id_column.name_len = strlen((char *)data.row_id_column.name);
|
|
710
|
+
}
|
|
711
|
+
} else if (!strcmp(argv[arg_i], "--col-name")) { // used to be -c
|
|
712
|
+
if (!(arg_i + 1 < argc && *argv[arg_i + 1]))
|
|
713
|
+
err = zsv_printerr(1, "%s option: missing column name", argv[arg_i]);
|
|
714
|
+
else {
|
|
715
|
+
data.column_name_column.name = (unsigned char *)argv[++arg_i];
|
|
716
|
+
data.column_name_column.name_len = strlen((char *)data.column_name_column.name);
|
|
717
|
+
}
|
|
718
|
+
} else if (!strcmp(argv[arg_i], "-V")) {
|
|
719
|
+
if (!(arg_i + 1 < argc))
|
|
720
|
+
err = zsv_printerr(1, "-V option: missing column name");
|
|
721
|
+
else {
|
|
722
|
+
data.value_column.name = (unsigned char *)argv[++arg_i];
|
|
723
|
+
data.value_column.name_len = strlen((char *)data.value_column.name);
|
|
724
|
+
}
|
|
725
|
+
} else if (!strcmp(argv[arg_i], "-o")) {
|
|
726
|
+
if (!(arg_i + 1 < argc))
|
|
727
|
+
err = zsv_printerr(1, "-o option: missing filename");
|
|
728
|
+
else if (*argv[arg_i + 1] == '-')
|
|
729
|
+
err = zsv_printerr(1, "-o option: filename may not start with '-' (got %s)", argv[arg_i + 1]);
|
|
730
|
+
else
|
|
731
|
+
data.output_filename = argv[++arg_i];
|
|
732
|
+
} else if (data.in)
|
|
733
|
+
err = zsv_printerr(1, "Input file was specified, cannot also read: %s", argv[arg_i]);
|
|
734
|
+
else if (!(data.in = fopen(argv[arg_i], "rb")))
|
|
735
|
+
err = zsv_printerr(1, "Could not open for reading: %s", argv[arg_i]);
|
|
736
|
+
else
|
|
737
|
+
data.input_path = argv[arg_i];
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
if (!data.in) {
|
|
741
|
+
#ifdef NO_STDIN
|
|
742
|
+
err = zsv_printerr(1, "Please specify an input file");
|
|
743
|
+
#else
|
|
744
|
+
data.in = stdin;
|
|
745
|
+
#endif
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
if (err) {
|
|
749
|
+
flatten_cleanup(&data);
|
|
750
|
+
return 1;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
if (agg_arg_i && agg_arg_i < argc) {
|
|
754
|
+
struct flatten_agg_col **nextp = &data.agg_output_cols;
|
|
755
|
+
for (int arg_i = 0; !err && arg_i + agg_arg_i < argc; arg_i++) {
|
|
756
|
+
const char *arg = argv[arg_i + agg_arg_i];
|
|
757
|
+
struct flatten_agg_col *cs = flatten_agg_col_new(arg, &err);
|
|
758
|
+
if (cs) {
|
|
759
|
+
data.have_agg = 1;
|
|
760
|
+
*nextp = cs;
|
|
761
|
+
nextp = &cs->next;
|
|
762
|
+
|
|
763
|
+
if (cs->agg_method == flatten_agg_method_json) {
|
|
764
|
+
if (!data.memfile) {
|
|
765
|
+
data.memfile = memfile_open(1024);
|
|
766
|
+
data.jsw = jsonwriter_new_stream(memfile_write, data.memfile);
|
|
767
|
+
if (!data.memfile || !data.jsw) {
|
|
768
|
+
fprintf(stderr, "Unable to allocate memfile and/or jsonwriter\n");
|
|
769
|
+
flatten_cleanup(&data);
|
|
770
|
+
return 1;
|
|
771
|
+
}
|
|
772
|
+
jsonwriter_set_option(data.jsw, jsonwriter_option_compact);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
if (!(data.out = writer_opts.stream = data.output_filename ? fopen(data.output_filename, "wb") : stdout))
|
|
780
|
+
err = zsv_printerr(1, "Unable to open %s for writing", data.output_filename);
|
|
781
|
+
|
|
782
|
+
int passes = data.column_name_column.name || !data.have_agg ? 2 : 1;
|
|
783
|
+
const char *input_path = NULL;
|
|
784
|
+
FILE *in = NULL;
|
|
785
|
+
char *tmp_fn = NULL;
|
|
786
|
+
zsv_handle_ctrl_c_signal();
|
|
787
|
+
if (passes == 1)
|
|
788
|
+
in = data.in;
|
|
789
|
+
else {
|
|
790
|
+
tmp_fn = zsv_get_temp_filename("zsv_flatten_XXXXXXXX");
|
|
791
|
+
if (tmp_fn) {
|
|
792
|
+
FILE *tmp_f = fopen(tmp_fn, "w+b");
|
|
793
|
+
opts.cell_handler = flatten_cell1;
|
|
794
|
+
opts.row_handler = flatten_row1;
|
|
795
|
+
opts.stream = data.in;
|
|
796
|
+
input_path = data.input_path;
|
|
797
|
+
opts.ctx = &data;
|
|
798
|
+
|
|
799
|
+
zsv_parser handle;
|
|
800
|
+
if (zsv_new_with_properties(&opts, custom_prop_handler, input_path, &handle) != zsv_status_ok)
|
|
801
|
+
err = data.cancelled = zsv_printerr(1, "Unable to create csv parser");
|
|
802
|
+
else {
|
|
803
|
+
zsv_set_scan_filter(handle, zsv_filter_write, tmp_f);
|
|
804
|
+
enum zsv_status status;
|
|
805
|
+
while (!data.cancelled && !zsv_signal_interrupted && (status = zsv_parse_more(handle)) == zsv_status_ok)
|
|
806
|
+
;
|
|
807
|
+
zsv_finish(handle);
|
|
808
|
+
zsv_delete(handle);
|
|
809
|
+
fflush(tmp_f);
|
|
810
|
+
rewind(tmp_f);
|
|
811
|
+
}
|
|
812
|
+
in = tmp_f;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
if (!err) {
|
|
817
|
+
struct zsv_opts opts2 = {0};
|
|
818
|
+
opts2.cell_handler = flatten_cell2;
|
|
819
|
+
opts2.row_handler = flatten_row2;
|
|
820
|
+
opts2.ctx = &data;
|
|
821
|
+
data.current_column_index = 0;
|
|
822
|
+
|
|
823
|
+
if (!(data.csv_writer = zsv_writer_new(&writer_opts)))
|
|
824
|
+
err = data.cancelled = zsv_printerr(1, "Unable to create csv writer");
|
|
825
|
+
|
|
826
|
+
flatten_output_header(&data);
|
|
827
|
+
|
|
828
|
+
opts2.stream = in;
|
|
829
|
+
zsv_parser parser = zsv_new(&opts2);
|
|
830
|
+
if (!parser)
|
|
831
|
+
err = data.cancelled = zsv_printerr(1, "Unable to create csv parser");
|
|
832
|
+
|
|
833
|
+
enum zsv_status status;
|
|
834
|
+
while (!data.cancelled && !zsv_signal_interrupted && (status = zsv_parse_more(parser)) == zsv_status_ok)
|
|
835
|
+
;
|
|
836
|
+
zsv_finish(parser);
|
|
837
|
+
zsv_delete(parser);
|
|
838
|
+
output_current_row(&data);
|
|
839
|
+
}
|
|
840
|
+
flatten_cleanup(&data);
|
|
841
|
+
|
|
842
|
+
if (in && in != stdin)
|
|
843
|
+
fclose(in);
|
|
844
|
+
|
|
845
|
+
if (tmp_fn) {
|
|
846
|
+
unlink(tmp_fn);
|
|
847
|
+
free(tmp_fn);
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
return err;
|
|
851
|
+
}
|