zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,913 @@
1
+ /*
2
+ * Copyright (C) 2023 Liquidaty and the zsv/lib contributors
3
+ * All rights reserved
4
+ *
5
+ * This file is part of zsv/lib, distributed under the license defined at
6
+ * https://opensource.org/licenses/MIT
7
+ */
8
+
9
+ #include <stdio.h>
10
+ #include <string.h>
11
+ #include <stdlib.h>
12
+ #include <math.h>
13
+ #include <limits.h>
14
+
15
+ #include <jsonwriter.h>
16
+
17
+ #include <sqlite3.h>
18
+ extern sqlite3_module CsvModule;
19
+
20
+ #include <zsv/utils/string.h>
21
+ #include <zsv/utils/writer.h>
22
+
23
+ #define ZSV_COMMAND compare
24
+ #include "zsv_command.h"
25
+
26
+ #include "compare.h"
27
+ #include "compare_internal.h"
28
+
29
+ #include "compare_unique_colname.c"
30
+ #include "compare_added_column.c"
31
+ #include "compare_sort.c"
32
+
33
+ #define ZSV_COMPARE_OUTPUT_TYPE_JSON 'j'
34
+
35
+ static struct zsv_compare_key **zsv_compare_key_add(struct zsv_compare_key **next, const char *s, int *err) {
36
+ struct zsv_compare_key *k = calloc(1, sizeof(*k));
37
+ if (!k)
38
+ *err = 1;
39
+ else {
40
+ k->name = s;
41
+ *next = k;
42
+ next = &k->next;
43
+ }
44
+ return next;
45
+ }
46
+
47
+ static void zsv_compare_output_property_name(struct zsv_compare_data *data, int new_row, char skip) {
48
+ if (new_row)
49
+ data->writer.cell_ix = 0;
50
+ else
51
+ data->writer.cell_ix++;
52
+ if (!skip) {
53
+ if (data->writer.cell_ix < data->writer.properties.used)
54
+ jsonwriter_object_key(data->writer.handle.jsw, data->writer.properties.names[data->writer.cell_ix]);
55
+ else
56
+ jsonwriter_object_key(data->writer.handle.jsw, "Error missing key!");
57
+ }
58
+ }
59
+
60
+ static void zsv_compare_output_strn(struct zsv_compare_data *data, const unsigned char *s, size_t len, int new_row,
61
+ int quoted) {
62
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
63
+ if (data->writer.object && s == NULL) {
64
+ zsv_compare_output_property_name(data, new_row, 1);
65
+ return;
66
+ }
67
+ if (data->writer.object)
68
+ zsv_compare_output_property_name(data, new_row, 0);
69
+ if (s == NULL)
70
+ jsonwriter_null(data->writer.handle.jsw);
71
+ else
72
+ jsonwriter_strn(data->writer.handle.jsw, s, len);
73
+ } else {
74
+ if (s == NULL)
75
+ zsv_writer_cell_blank(data->writer.handle.csv, ZSV_WRITER_SAME_ROW);
76
+ else
77
+ zsv_writer_cell(data->writer.handle.csv, new_row, s, len, quoted);
78
+ }
79
+ }
80
+
81
+ static void zsv_compare_output_str(struct zsv_compare_data *data, const unsigned char *s, int new_row, int quoted) {
82
+ zsv_compare_output_strn(data, s, s ? strlen((const char *)s) : 0, new_row, quoted);
83
+ }
84
+
85
+ static void zsv_compare_output_zu(struct zsv_compare_data *data, size_t n, int new_row) {
86
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
87
+ if (data->writer.object)
88
+ zsv_compare_output_property_name(data, new_row, 0);
89
+ jsonwriter_int(data->writer.handle.jsw, n);
90
+ } else
91
+ zsv_writer_cell_zu(data->writer.handle.csv, ZSV_WRITER_NEW_ROW, data->row_count);
92
+ }
93
+
94
+ static void zsv_compare_header_str(struct zsv_compare_data *data, const unsigned char *s, int new_row, int quoted) {
95
+ if (!(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && data->writer.object))
96
+ zsv_compare_output_str(data, s, new_row, quoted);
97
+ else {
98
+ // we will output as JSON objects, so save the property names for later use
99
+ if (data->writer.properties.used + 1 < data->writer.properties.allocated)
100
+ data->writer.properties.names[data->writer.properties.used++] = strdup(s ? (const char *)s : "");
101
+ else
102
+ fprintf(stderr, "zsv_compare_header_str: insufficient header names allocation adding %s!\n", s);
103
+ }
104
+ }
105
+
106
+ static void zsv_compare_allocate_properties(struct zsv_compare_data *data, unsigned count) {
107
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && data->writer.object && count > 0) {
108
+ if ((data->writer.properties.names = malloc(count * sizeof(*data->writer.properties.names))))
109
+ data->writer.properties.allocated = count;
110
+ }
111
+ }
112
+
113
+ static void zsv_compare_json_row_start(struct zsv_compare_data *data) {
114
+ if (data->writer.object)
115
+ jsonwriter_start_object(data->writer.handle.jsw);
116
+ else
117
+ jsonwriter_start_array(data->writer.handle.jsw);
118
+ }
119
+
120
+ static void zsv_compare_json_row_end(struct zsv_compare_data *data) {
121
+ if (data->writer.object)
122
+ jsonwriter_end_object(data->writer.handle.jsw);
123
+ else
124
+ jsonwriter_end_array(data->writer.handle.jsw);
125
+ }
126
+
127
+ static void zsv_compare_output_tuple(struct zsv_compare_data *data, struct zsv_compare_input *key_input,
128
+ const unsigned char *colname,
129
+ struct zsv_cell *values, // in original input order
130
+ char is_key) {
131
+ // print ID | Column | Value 1 | ... | Value N
132
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON)
133
+ zsv_compare_json_row_start(data);
134
+
135
+ // TO DO: output ID values
136
+ if (!data->keys) // id is effectively just row number
137
+ zsv_compare_output_zu(data, data->row_count, ZSV_WRITER_NEW_ROW);
138
+ else {
139
+ for (unsigned idx = 0; idx < key_input->key_count; idx++) {
140
+ struct zsv_cell *c = &key_input->keys[idx].value;
141
+ zsv_compare_output_strn(data, c->str, c->len, idx == 0 ? ZSV_WRITER_NEW_ROW : ZSV_WRITER_SAME_ROW, c->quoted);
142
+ }
143
+ }
144
+
145
+ // output additional columns
146
+ for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next) {
147
+ if (!ac->input) {
148
+ if (data->writer.type != ZSV_COMPARE_OUTPUT_TYPE_JSON)
149
+ zsv_compare_output_str(data, NULL, ZSV_WRITER_SAME_ROW, 0);
150
+ } else {
151
+ struct zsv_cell c = data->get_cell(ac->input, ac->col_ix);
152
+ zsv_compare_output_strn(data, c.str, c.len, ZSV_WRITER_SAME_ROW, c.quoted);
153
+ }
154
+ }
155
+
156
+ // output column name of this cell
157
+ zsv_compare_output_str(data, colname, ZSV_WRITER_SAME_ROW, 1);
158
+
159
+ for (unsigned i = 0; i < data->input_count; i++) {
160
+ struct zsv_compare_input *input = &data->inputs[i];
161
+ if ((input->done || !input->row_loaded) && !is_key) { // no data for this input
162
+ zsv_compare_output_str(data, NULL, ZSV_WRITER_SAME_ROW, 0);
163
+ } else {
164
+ struct zsv_cell *value = &values[i];
165
+ zsv_compare_output_strn(data, value->str, value->len, ZSV_WRITER_SAME_ROW, value->quoted);
166
+ }
167
+ }
168
+
169
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON)
170
+ zsv_compare_json_row_end(data);
171
+ }
172
+
173
+ static const unsigned char *zsv_compare_combined_key_names(struct zsv_compare_data *data) {
174
+ if (!data->combined_key_names) {
175
+ size_t len = 2;
176
+
177
+ for (unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
178
+ struct zsv_compare_key *key = &data->keys[key_ix];
179
+ if (key && key->name)
180
+ len += strlen(key->name) + 1;
181
+ }
182
+ if ((data->combined_key_names = calloc(1, len))) {
183
+ unsigned char *start = NULL;
184
+ for (unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
185
+ struct zsv_compare_key *key = &data->keys[key_ix];
186
+ if (key && key->name) {
187
+ if (start) {
188
+ *start = (unsigned char)'|';
189
+ start++;
190
+ } else
191
+ start = data->combined_key_names;
192
+ strcpy((char *)start, key->name);
193
+ start += strlen((char *)start);
194
+ }
195
+ }
196
+ }
197
+ }
198
+ return data->combined_key_names;
199
+ }
200
+
201
+ static void zsv_compare_print_row(struct zsv_compare_data *data,
202
+ const unsigned last_ix // last input ix in inputs_to_sort
203
+ ) {
204
+ struct zsv_compare_input *key_input = data->inputs_to_sort[0];
205
+
206
+ // for now, output format is simple: for each value,
207
+ // output a single scalar if the values are the same,
208
+ // and a tuple if they differ
209
+ struct zsv_cell *values = calloc(data->input_count, sizeof(*values));
210
+ if (!values) {
211
+ data->status = zsv_compare_status_memory;
212
+ return;
213
+ }
214
+
215
+ #define ZSV_COMPARE_MISSING "Missing"
216
+
217
+ // if we don't have data from every input, then output "Missing" for missing inputs
218
+ char got_missing = 0;
219
+ for (unsigned i = 0; i < data->input_count; i++) {
220
+ struct zsv_compare_input *input = data->inputs_to_sort[i];
221
+ if (i > last_ix) {
222
+ got_missing = 1;
223
+ unsigned input_ix = input->index;
224
+ values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
225
+ values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
226
+ }
227
+ }
228
+ if (got_missing) {
229
+ const unsigned char *key_names =
230
+ data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)"<key>";
231
+ zsv_compare_output_tuple(data, key_input, key_names, values, 1);
232
+ // reset values
233
+ memset(values, 0, data->input_count * sizeof(*values));
234
+ }
235
+
236
+ // for each output column
237
+ zsv_compare_unique_colname *output_col = data->output_colnames_first;
238
+ for (unsigned output_ix = 0; output_ix < data->output_colcount && output_col != NULL;
239
+ output_ix++, output_col = output_col->next) {
240
+ if (output_col->is_key)
241
+ continue;
242
+
243
+ char different = 0;
244
+ unsigned first_input_ix = 0;
245
+ for (unsigned i = 0; i <= last_ix; i++) {
246
+ struct zsv_compare_input *input = data->inputs_to_sort[i];
247
+ if (input->done || !input->row_loaded)
248
+ continue;
249
+
250
+ unsigned input_ix = input->index;
251
+ if (i == 0)
252
+ first_input_ix = input_ix;
253
+
254
+ unsigned col_ix_plus_1 = input->out2in[output_ix];
255
+ if (col_ix_plus_1 == 0)
256
+ values[input_ix].len = 0;
257
+ else {
258
+ unsigned input_col_ix = col_ix_plus_1 - 1;
259
+ if (!output_col)
260
+ output_col = input->output_colnames[input_col_ix];
261
+ values[input_ix] = data->get_cell(input, input_col_ix);
262
+ if (i > 0 && !different &&
263
+ data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix)) {
264
+ different = 1;
265
+ if (data->tolerance.value && values[first_input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN &&
266
+ values[input_ix].len < ZSV_COMPARE_MAX_NUMBER_BUFF_LEN) {
267
+ // check if both are numbers with a difference less than the given tolerance
268
+ double d1, d2;
269
+ memcpy(data->tolerance.str1, values[first_input_ix].str, values[first_input_ix].len);
270
+ data->tolerance.str1[values[first_input_ix].len] = '\0';
271
+ memcpy(data->tolerance.str2, values[input_ix].str, values[input_ix].len);
272
+ data->tolerance.str2[values[input_ix].len] = '\0';
273
+ if (!zsv_strtod_exact(data->tolerance.str1, &d1) && !zsv_strtod_exact(data->tolerance.str2, &d2) &&
274
+ fabs(d1 - d2) < data->tolerance.value)
275
+ different = 0;
276
+ }
277
+ }
278
+ }
279
+ }
280
+
281
+ if (different) {
282
+ zsv_compare_output_tuple(data, key_input, output_col->name, values, 0);
283
+ if (data->diff_count < INT_MAX)
284
+ data->diff_count++;
285
+ }
286
+ }
287
+ free(values);
288
+ }
289
+
290
+ static void zsv_compare_input_free(struct zsv_compare_input *input) {
291
+ zsv_delete(input->parser);
292
+ zsv_compare_unique_colnames_delete(&input->colnames);
293
+ if (input->added)
294
+ sqlite3_zsv_list_remove(input->path);
295
+ free(input->out2in);
296
+ if (input->stream)
297
+ fclose(input->stream);
298
+ free(input->output_colnames);
299
+ free(input->keys);
300
+ if (input->sort_stmt) {
301
+ sqlite3_finalize(input->sort_stmt);
302
+ }
303
+ }
304
+
305
+ static enum zsv_compare_status zsv_compare_set_inputs(struct zsv_compare_data *data, unsigned input_count) {
306
+ if (!input_count || !(data->inputs = calloc(input_count, sizeof(*data->inputs))) ||
307
+ !(data->inputs_to_sort = calloc(input_count, sizeof(*data->inputs_to_sort))))
308
+ return zsv_compare_status_memory;
309
+ data->input_count = input_count;
310
+ for (unsigned i = 0; i < input_count; i++) {
311
+ struct zsv_compare_input *input = &data->inputs[i];
312
+ input->index = i;
313
+ data->inputs_to_sort[i] = input;
314
+ if (data->key_count) {
315
+ if (!(input->keys = calloc(data->key_count, sizeof(*input->keys))))
316
+ return zsv_compare_status_memory;
317
+
318
+ input->key_count = data->key_count;
319
+ unsigned j = 0;
320
+ for (struct zsv_compare_key *key = data->keys; key; key = key->next)
321
+ input->keys[j++].key = key;
322
+ }
323
+ }
324
+ return zsv_compare_status_ok;
325
+ }
326
+
327
+ static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2, void *data, unsigned col_ix);
328
+
329
+ static void zsv_compare_output_begin(struct zsv_compare_data *data) {
330
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
331
+ if (!(data->writer.handle.jsw = jsonwriter_new(stdout))) // to do: data->out
332
+ data->status = zsv_compare_status_memory;
333
+ else {
334
+ if (data->writer.compact)
335
+ jsonwriter_set_option(data->writer.handle.jsw, jsonwriter_option_compact);
336
+ jsonwriter_start_array(data->writer.handle.jsw);
337
+ }
338
+ } else {
339
+ if (!(data->writer.handle.csv = zsv_writer_new(NULL)))
340
+ data->status = zsv_compare_status_memory;
341
+ }
342
+
343
+ if (data->status == zsv_compare_status_ok) {
344
+ unsigned header_col_count = (data->key_count ? data->key_count : 1) + // match keys
345
+ 2 + // column name and column value
346
+ data->input_count + // input names
347
+ data->added_colcount; // added columns
348
+
349
+ zsv_compare_allocate_properties(data, header_col_count);
350
+
351
+ // write header row
352
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && !data->writer.object)
353
+ jsonwriter_start_array(data->writer.handle.jsw);
354
+
355
+ // write keys
356
+ if (!data->keys) // id is effectively just row number
357
+ zsv_compare_header_str(data, (const unsigned char *)"Row #", ZSV_WRITER_NEW_ROW, 0);
358
+ else {
359
+ for (struct zsv_compare_key *key_name = data->keys; key_name; key_name = key_name->next)
360
+ zsv_compare_header_str(data, (const unsigned char *)key_name->name,
361
+ key_name == data->keys ? ZSV_WRITER_NEW_ROW : ZSV_WRITER_SAME_ROW, 1);
362
+ }
363
+
364
+ // write additional column names
365
+ for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next)
366
+ zsv_compare_header_str(data, ac->colname->name, ZSV_WRITER_SAME_ROW, 1);
367
+
368
+ // write "Column"
369
+ zsv_compare_header_str(data, (const unsigned char *)"Column", ZSV_WRITER_SAME_ROW, 0);
370
+
371
+ // write input name(s)
372
+ for (unsigned i = 0; i < data->input_count; i++) {
373
+ struct zsv_compare_input *input = &data->inputs[i];
374
+ zsv_compare_header_str(data, (const unsigned char *)input->path, ZSV_WRITER_SAME_ROW, 1);
375
+ }
376
+
377
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && !data->writer.object)
378
+ jsonwriter_end_array(data->writer.handle.jsw);
379
+ }
380
+ }
381
+
382
+ static void zsv_compare_output_end(struct zsv_compare_data *data) {
383
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
384
+ if (data->writer.handle.jsw)
385
+ jsonwriter_end(data->writer.handle.jsw);
386
+ } else {
387
+ zsv_writer_flush(data->writer.handle.csv);
388
+ }
389
+ if (data->status == zsv_compare_status_no_more_input)
390
+ data->status = zsv_compare_status_ok;
391
+ }
392
+
393
+ static enum zsv_status zsv_compare_next_unsorted_row(struct zsv_compare_input *input) {
394
+ return zsv_next_row(input->parser);
395
+ }
396
+
397
+ static struct zsv_cell zsv_compare_get_unsorted_cell(struct zsv_compare_input *input, unsigned ix) {
398
+ return zsv_get_cell_trimmed(input->parser, ix);
399
+ }
400
+
401
+ static unsigned zsv_compare_get_unsorted_colcount(struct zsv_compare_input *input) {
402
+ return zsv_cell_count(input->parser);
403
+ }
404
+
405
+ static enum zsv_compare_status input_init_unsorted(struct zsv_compare_data *data, struct zsv_compare_input *input,
406
+ struct zsv_opts *opts,
407
+ struct zsv_prop_handler *custom_prop_handler) {
408
+ if (!(input->stream = fopen(input->path, "rb"))) {
409
+ perror(input->path);
410
+ return zsv_compare_status_error;
411
+ }
412
+ struct zsv_opts these_opts = *opts;
413
+ these_opts.stream = input->stream;
414
+ enum zsv_status stat = zsv_new_with_properties(&these_opts, custom_prop_handler, input->path, &input->parser);
415
+ if (stat != zsv_status_ok)
416
+ return zsv_compare_status_error;
417
+
418
+ if (data->next_row(input) != zsv_status_row)
419
+ return zsv_compare_status_error;
420
+
421
+ return zsv_compare_status_ok;
422
+ }
423
+
424
+ zsv_compare_handle zsv_compare_new(void) {
425
+ zsv_compare_handle z = calloc(1, sizeof(*z));
426
+ #if defined(ZSV_COMPARE_CMP_FUNC) && defined(ZSV_COMPARE_CMP_CTX)
427
+ zsv_compare_set_comparison(z, ZSV_COMPARE_CMP_FUNC, ZSV_COMPARE_CMP_CTX);
428
+ #else
429
+ zsv_compare_set_comparison(z, zsv_compare_cell, NULL);
430
+ #endif
431
+ z->output_colnames_next = &z->output_colnames;
432
+
433
+ z->next_row = zsv_compare_next_unsorted_row;
434
+ z->get_cell = zsv_compare_get_unsorted_cell;
435
+ z->get_column_name = zsv_compare_get_unsorted_cell;
436
+ z->get_column_count = zsv_compare_get_unsorted_colcount;
437
+ z->input_init = input_init_unsorted;
438
+ return z;
439
+ }
440
+
441
+ static void zsv_compare_set_sorted_callbacks(struct zsv_compare_data *data) {
442
+ data->next_row = zsv_compare_next_sorted_row;
443
+ data->get_cell = zsv_compare_get_sorted_cell;
444
+ data->get_column_name = zsv_compare_get_sorted_colname;
445
+ data->get_column_count = zsv_compare_get_sorted_colcount;
446
+ data->input_init = input_init_sorted;
447
+ }
448
+
449
+ static enum zsv_compare_status zsv_compare_init_sorted(struct zsv_compare_data *data) {
450
+ int rc;
451
+ // to do: use sql_internal.h interface
452
+ const char *db_url = data->sort_in_memory ? "file::memory:" : "";
453
+ if ((rc = sqlite3_open_v2(db_url, &data->sort_db, SQLITE_OPEN_URI | SQLITE_OPEN_READWRITE, NULL)) == SQLITE_OK &&
454
+ data->sort_db && (rc = sqlite3_create_module(data->sort_db, "csv", &CsvModule, 0) == SQLITE_OK)) {
455
+ zsv_compare_set_sorted_callbacks(data);
456
+ return zsv_compare_status_ok;
457
+ }
458
+ return zsv_compare_status_error;
459
+ }
460
+
461
+ static void zsv_compare_data_free(struct zsv_compare_data *data) {
462
+ if (data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
463
+ if (data->writer.handle.jsw)
464
+ jsonwriter_delete(data->writer.handle.jsw);
465
+ } else
466
+ zsv_writer_delete(data->writer.handle.csv);
467
+
468
+ for (unsigned i = 0; i < data->input_count; i++)
469
+ zsv_compare_input_free(&data->inputs[i]);
470
+ free(data->inputs);
471
+ free(data->combined_key_names);
472
+ free(data->inputs_to_sort);
473
+ for (unsigned i = 0; i < data->writer.properties.used; i++)
474
+ free(data->writer.properties.names[i]);
475
+ free(data->writer.properties.names);
476
+
477
+ if (data->sort) {
478
+ if (data->sort_db)
479
+ sqlite3_close(data->sort_db);
480
+ }
481
+
482
+ zsv_compare_added_column_delete(data->added_columns);
483
+
484
+ zsv_compare_unique_colnames_delete(&data->output_colnames);
485
+ zsv_compare_unique_colnames_delete(&data->added_colnames);
486
+
487
+ for (struct zsv_compare_key *next, *key = data->keys; key; key = next) {
488
+ next = key->next;
489
+ free(key);
490
+ }
491
+ }
492
+
493
+ void zsv_compare_delete(zsv_compare_handle z) {
494
+ if (z) {
495
+ zsv_compare_data_free(z);
496
+ free(z);
497
+ }
498
+ }
499
+
500
+ void zsv_compare_set_comparison(struct zsv_compare_data *data, zsv_compare_cell_func cmp, void *cmp_ctx) {
501
+ data->cmp = cmp;
502
+ data->cmp_ctx = cmp_ctx;
503
+ }
504
+
505
+ static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2, void *data, unsigned col_ix) {
506
+ (void)(ctx);
507
+ (void)(data);
508
+ (void)(col_ix);
509
+ return zsv_strincmp(c1.str, c1.len, c2.str, c2.len);
510
+ }
511
+
512
+ static enum zsv_compare_status zsv_compare_advance(struct zsv_compare_data *data) {
513
+ // advance each input (if not row_loaded) to their next row
514
+ char got = 0;
515
+ for (unsigned i = 0; i < data->input_count; i++) {
516
+ struct zsv_compare_input *input = &data->inputs[i];
517
+ if (input->done)
518
+ continue;
519
+
520
+ if (input->row_loaded) {
521
+ got = 1;
522
+ continue;
523
+ }
524
+ if (data->next_row(input) != zsv_status_row)
525
+ input->done = 1;
526
+ else {
527
+ for (unsigned idx = 0; idx < input->key_count; idx++)
528
+ input->keys[idx].value = data->get_cell(input, input->keys[idx].col_ix);
529
+ input->row_loaded = 1;
530
+ got = 1;
531
+ }
532
+ }
533
+ return got ? zsv_compare_status_ok : zsv_compare_status_no_more_input;
534
+ }
535
+
536
+ static int zsv_compare_inputp_cmp(const void *inputpx, const void *inputpy) {
537
+ struct zsv_compare_input *const *xp = inputpx;
538
+ struct zsv_compare_input *const *yp = inputpy;
539
+ const struct zsv_compare_input *x = *xp;
540
+ const struct zsv_compare_input *y = *yp;
541
+
542
+ if (!x->row_loaded && !y->row_loaded)
543
+ return 0;
544
+ if (!x->row_loaded)
545
+ return 1;
546
+ if (!y->row_loaded)
547
+ return -1;
548
+
549
+ int cmp = 0;
550
+ for (unsigned i = 0; !cmp && i < x->key_count && i < y->key_count; i++)
551
+ // for multibyte input, the input must be also sorted lexicographically
552
+ // to avoid potential mismatches
553
+ // see e.g. https://stackoverflow.com/questions/4611302/sorting-utf-8-strings
554
+ cmp = zsv_strincmp(x->keys[i].value.str, x->keys[i].value.len, y->keys[i].value.str, y->keys[i].value.len);
555
+ return cmp;
556
+ }
557
+
558
+ static enum zsv_compare_status zsv_compare_next(struct zsv_compare_data *data) {
559
+ data->status = zsv_compare_advance(data);
560
+ if (data->status != zsv_compare_status_ok)
561
+ return data->status;
562
+
563
+ data->row_count++;
564
+ // sort the inputs by ID value first, and input position second
565
+ // for as many inputs have the same smallest ID values, output them as a group
566
+ // and set input->row_loaded to 0
567
+ qsort(data->inputs_to_sort, data->input_count, sizeof(*data->inputs_to_sort), zsv_compare_inputp_cmp);
568
+
569
+ // find the next subset of inputs with identical id values and process those inputs
570
+ unsigned last = 0;
571
+ struct zsv_compare_input *min_input = data->inputs_to_sort[0];
572
+ for (unsigned tmp_i = 1; tmp_i < data->input_count; tmp_i++) {
573
+ struct zsv_compare_input *tmp = data->inputs_to_sort[tmp_i];
574
+ if (!tmp->row_loaded)
575
+ continue;
576
+ if (!zsv_compare_inputp_cmp(&min_input, &tmp)) { // keys are the same
577
+ last = tmp_i;
578
+ continue;
579
+ }
580
+
581
+ // keys are different
582
+ break;
583
+ }
584
+
585
+ // print row
586
+ zsv_compare_print_row(data, last);
587
+
588
+ // reset row_loaded
589
+ for (unsigned tmp = 0; tmp <= last; tmp++)
590
+ data->inputs_to_sort[tmp]->row_loaded = 0;
591
+
592
+ return zsv_compare_status_ok;
593
+ }
594
+
595
+ static int compare_usage(void) {
596
+ static const char *usage[] = {
597
+ "Usage: compare [options] <file.csv>...",
598
+ "",
599
+ "Options:",
600
+ " -h,--help : show usage",
601
+ " -k,--key <colname> : specify a column to match rows on",
602
+ " can be specified multiple times",
603
+ " -a,--add <colname> : specify an additional column to output",
604
+ " will use the [first input] source",
605
+ " --sort : sort on keys before comparing",
606
+ " --sort-in-memory : for sorting, use in-memory instead of temporary db",
607
+ " (see https://www.sqlite.org/inmemorydb.html)",
608
+ " --tolerance <value>: ignore differences where both values are numeric",
609
+ " strings with values differing by less than the given",
610
+ " amount e.g. --tolerance 0.01 will ignore differences",
611
+ " of numeric strings such as 123.45 vs 123.44",
612
+ " --json : output as JSON",
613
+ " --json-compact : output as compact JSON",
614
+ " --json-object : output as an array of objects",
615
+ " --print-key-colname: when outputting key column diffs,",
616
+ " print column name instead of <key>",
617
+ " -e,--exit-code : return < 0 on error, else the number of differences found",
618
+ "",
619
+ "NOTES",
620
+ "",
621
+ " If no keys are specified, each row from each input is compared to the",
622
+ " row in the corresponding position in each other input (all the first rows",
623
+ " from each input are compared to each other, all the second rows are compared to",
624
+ " each other, etc).",
625
+ "",
626
+ " If one or more key is specified, each input is assumed to already be",
627
+ " lexicographically sorted in ascending order; this is a necessary condition",
628
+ " for the output to be correct (unless the --sort option is used). However, it",
629
+ " is not required for each input to contain the same population of row keys",
630
+ "",
631
+ " The --sort option uses sqlite3 (unindexed) sort and is intended to be a",
632
+ " convenience rather than performance feature. If you need high performance",
633
+ " sorting, other solutions, such as a multi-threaded parallel sort, are likely",
634
+ " superior. For handling quoted data, `2tsv` can be used to convert to a delimited",
635
+ " format without quotes, that can be directly parsed with common UNIX utilities",
636
+ " (such as `sort`), and `select --unescape` can be used to convert back",
637
+ NULL,
638
+ };
639
+
640
+ for (size_t i = 0; usage[i]; i++)
641
+ printf("%s\n", usage[i]);
642
+
643
+ return 0;
644
+ }
645
+
646
+ // TO DO: consolidate w sql.c, move common code to utils/db.c
647
+ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
648
+ struct zsv_prop_handler *custom_prop_handler) {
649
+ // See sql.c re passing options to sqlite3 when sorting is used
650
+ if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
651
+ compare_usage();
652
+ return argc < 2 ? 1 : 0;
653
+ }
654
+
655
+ // temporarily hold the input file names
656
+ const char **input_filenames = calloc(argc, sizeof(*input_filenames));
657
+ if (!input_filenames)
658
+ return zsv_compare_status_memory;
659
+
660
+ zsv_compare_handle data = zsv_compare_new();
661
+ if (!data) {
662
+ free(input_filenames);
663
+ return zsv_compare_status_memory;
664
+ }
665
+
666
+ int err = 0;
667
+ // initialization starts here. to do: make this a separate function
668
+ unsigned input_count = 0;
669
+ struct zsv_compare_key **next_key = &data->keys;
670
+ struct zsv_compare_added_column **added_column_next = &data->added_columns;
671
+ for (int arg_i = 1; data->status == zsv_compare_status_ok && !err && arg_i < argc; arg_i++) {
672
+ const char *arg = argv[arg_i];
673
+ #include <zsv/utils/arg.h>
674
+ if (!strcmp(arg, "-k") || !strcmp(arg, "--key")) {
675
+ const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err);
676
+ if (next_arg) {
677
+ next_key = zsv_compare_key_add(next_key, next_arg, &err);
678
+ data->key_count++;
679
+ }
680
+ } else if (!strcmp(arg, "-a") || !strcmp(arg, "--add")) {
681
+ const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err);
682
+ if (next_arg) {
683
+ zsv_compare_unique_colname *colname;
684
+ if ((data->status = zsv_compare_unique_colname_add(&data->added_colnames, (const unsigned char *)next_arg,
685
+ strlen(next_arg), &colname)) == zsv_compare_status_ok) {
686
+ // add to linked list for use after all data->output_colnames are allocated
687
+ added_column_next = zsv_compare_added_column_add(added_column_next, colname, &data->status);
688
+ if (data->status == zsv_compare_status_ok)
689
+ data->added_colcount++;
690
+ }
691
+ }
692
+ } else if (!strcmp(arg, "--tolerance")) {
693
+ const char *next_arg = zsv_next_arg(++arg_i, argc, argv, &err);
694
+ if (next_arg) {
695
+ if (zsv_strtod_exact(next_arg, &data->tolerance.value))
696
+ fprintf(stderr, "Invalid numeric value: %s\n", next_arg), err = 1;
697
+ else if (data->tolerance.value < 0)
698
+ fprintf(stderr, "Tolerance must be greater than zero (got %s)\n", next_arg), err = 1;
699
+ else
700
+ data->tolerance.value = nextafterf(data->tolerance.value, INFINITY);
701
+ }
702
+ } else if (!strcmp(arg, "--sort")) {
703
+ data->sort = 1;
704
+ } else if (!strcmp(arg, "--exit-code") || !strcmp(arg, "-e")) {
705
+ data->return_count = 1;
706
+ } else if (!strcmp(arg, "--json")) {
707
+ data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
708
+ } else if (!strcmp(arg, "--json-object")) {
709
+ data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
710
+ data->writer.object = 1;
711
+ } else if (!strcmp(arg, "--json-compact")) {
712
+ data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
713
+ data->writer.compact = 1;
714
+ } else if (!strcmp(arg, "--print-key-colname")) {
715
+ data->print_key_col_names = 1;
716
+ } else
717
+ input_filenames[input_count++] = arg;
718
+ }
719
+
720
+ struct zsv_opts original_default_opts;
721
+ struct zsv_prop_handler original_default_custom_prop_handler;
722
+ if (data->sort) {
723
+ if (!data->key_count) {
724
+ fprintf(stderr, "Error: --sort requires one or more keys\n");
725
+ data->status = zsv_compare_status_error;
726
+ } else {
727
+ original_default_opts = zsv_get_default_opts();
728
+ zsv_set_default_opts(*opts);
729
+
730
+ if (custom_prop_handler) {
731
+ original_default_custom_prop_handler = zsv_get_default_custom_prop_handler();
732
+ zsv_set_default_custom_prop_handler(*custom_prop_handler);
733
+ }
734
+
735
+ if (data->status == zsv_compare_status_ok)
736
+ data->status = zsv_compare_init_sorted(data);
737
+ }
738
+ }
739
+
740
+ if (err && data->status == zsv_compare_status_ok)
741
+ data->status = zsv_compare_status_error;
742
+ else if (!input_count)
743
+ data->status = zsv_compare_status_error;
744
+ else if (data->status == zsv_compare_status_ok) {
745
+ if ((data->status = zsv_compare_set_inputs(data, input_count)) == zsv_compare_status_ok) {
746
+ // initialize parsers
747
+ for (unsigned ix = 0; data->status == zsv_compare_status_ok && ix < input_count; ix++) {
748
+ struct zsv_compare_input *input = &data->inputs[ix];
749
+ input->path = input_filenames[ix];
750
+ data->status = data->input_init(data, input, opts, custom_prop_handler);
751
+ }
752
+ }
753
+
754
+ if (data->status == zsv_compare_status_ok) {
755
+ // find keys
756
+ for (unsigned i = 0; data->status == zsv_compare_status_ok && i < data->input_count; i++) {
757
+ struct zsv_compare_input *input = &data->inputs[i];
758
+ if ((input->col_count = data->get_column_count(input))) {
759
+ if (!(input->output_colnames = calloc(input->col_count, sizeof(*input->output_colnames)))) {
760
+ data->status = zsv_compare_status_memory;
761
+ break;
762
+ }
763
+ }
764
+
765
+ unsigned found_keys = 0;
766
+ for (unsigned j = 0; j < input->col_count && !input->done && data->status == zsv_compare_status_ok; j++) {
767
+ struct zsv_cell colname = data->get_column_name(input, j);
768
+ const unsigned char *colname_s = colname.str;
769
+ unsigned colname_len = colname.len;
770
+ zsv_compare_unique_colname *input_col;
771
+ data->status = zsv_compare_unique_colname_add(&input->colnames, colname_s, colname_len, &input_col);
772
+ if (data->status != zsv_compare_status_ok)
773
+ break;
774
+
775
+ if (input_col) {
776
+ // now that we know this colname+instance_num is unique to this input
777
+ // check if it is a key
778
+ for (unsigned key_ix = 0; found_keys < input->key_count && key_ix < input->key_count; key_ix++) {
779
+ struct zsv_compare_input_key *k = &input->keys[key_ix];
780
+ if (!k->found &&
781
+ !zsv_strincmp(colname_s, colname_len, (const unsigned char *)k->key->name, strlen(k->key->name))) {
782
+ k->found = 1;
783
+ found_keys++;
784
+ k->col_ix = j;
785
+ input_col->is_key = 1;
786
+ break;
787
+ }
788
+ }
789
+
790
+ // add it to the output
791
+ int added = 0;
792
+ zsv_compare_unique_colname *output_col = zsv_compare_unique_colname_add_if_not_found(
793
+ &data->output_colnames, colname_s, colname_len, input_col->instance_num, &added);
794
+ if (!output_col) // error
795
+ data->status = zsv_compare_status_error;
796
+ else {
797
+ if (added) {
798
+ if (*data->output_colnames_next)
799
+ (*data->output_colnames_next)->next = output_col;
800
+ if (!data->output_colnames_first)
801
+ data->output_colnames_first = output_col;
802
+
803
+ *data->output_colnames_next = output_col;
804
+ output_col->is_key = input_col->is_key;
805
+ data->output_colnames_next = &output_col->next;
806
+ output_col->output_ix = data->output_colcount++;
807
+ }
808
+ input->output_colnames[j] = output_col;
809
+ }
810
+ }
811
+ }
812
+
813
+ if (found_keys != data->key_count) {
814
+ fprintf(stderr, "Unable to find the following keys in %s: ", input->path);
815
+ for (unsigned int j = 0; j < input->key_count; j++) {
816
+ struct zsv_compare_input_key *k = &input->keys[j];
817
+ if (!k->found)
818
+ fprintf(stderr, "\n %s", k->key->name);
819
+ }
820
+ fprintf(stderr, "\n");
821
+ data->status = zsv_compare_status_error;
822
+ }
823
+ }
824
+ }
825
+
826
+ if (data->status == zsv_compare_status_ok) {
827
+ if (data->output_colcount == 0)
828
+ data->status = zsv_compare_status_no_data;
829
+ }
830
+
831
+ char started = 0;
832
+ if (data->status == zsv_compare_status_ok) {
833
+ started = 1;
834
+ zsv_compare_output_begin(data);
835
+
836
+ // match output colnames to added columns
837
+ for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next) {
838
+ zsv_compare_unique_colname col = {0};
839
+ col.name = ac->colname->name;
840
+ col.name_len = ac->colname->name_len;
841
+ col.instance_num = ac->colname->instance_num;
842
+ ac->output_colname = sglib_zsv_compare_unique_colname_find_member(data->output_colnames, &col);
843
+ if (!ac->output_colname)
844
+ fprintf(stderr, "Warning: added column %.*s not found in any input\n", (int)col.name_len, col.name);
845
+ }
846
+
847
+ // assign out2in mappings
848
+ for (unsigned i = 0; data->status == zsv_compare_status_ok && i < data->input_count; i++) {
849
+ struct zsv_compare_input *input = &data->inputs[i];
850
+ if (input->done)
851
+ continue;
852
+ if (!(input->out2in = calloc(data->output_colcount, sizeof(*input->out2in))))
853
+ data->status = zsv_compare_status_memory;
854
+ else {
855
+ for (unsigned j = 0; j < input->col_count; j++) {
856
+ zsv_compare_unique_colname *output_col = input->output_colnames[j];
857
+ if (output_col) {
858
+ input->out2in[output_col->output_ix] = j + 1;
859
+
860
+ // check if this should be the source of any additional columns
861
+ for (struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next) {
862
+ if (!ac->input && ac->output_colname) {
863
+ if (output_col == ac->output_colname) {
864
+ ac->input = input;
865
+ ac->col_ix = j;
866
+ }
867
+ }
868
+ }
869
+ }
870
+ }
871
+ }
872
+ }
873
+ }
874
+
875
+ // assertions
876
+ if (data->status == zsv_compare_status_ok) {
877
+ int ok = 0;
878
+ for (unsigned i = 0; i < data->input_count; i++)
879
+ if (!data->inputs[i].done)
880
+ ok++;
881
+
882
+ if (ok < 2) {
883
+ fprintf(stderr, "Compare requires at least two non-empty inputs\n");
884
+ data->status = zsv_compare_status_error;
885
+ }
886
+ }
887
+
888
+ // next, compare each row
889
+ while (data->status == zsv_compare_status_ok && zsv_compare_next(data) == zsv_compare_status_ok)
890
+ ;
891
+ if (started)
892
+ zsv_compare_output_end(data);
893
+ }
894
+
895
+ free(input_filenames);
896
+
897
+ err = data->status == zsv_compare_status_ok ? 0 : 1;
898
+
899
+ if (data->sort) {
900
+ zsv_set_default_opts(original_default_opts); // restore default options
901
+ if (custom_prop_handler)
902
+ zsv_set_default_custom_prop_handler(original_default_custom_prop_handler);
903
+ }
904
+
905
+ if (data->return_count) {
906
+ if (err)
907
+ err = -1;
908
+ else
909
+ err = data->diff_count;
910
+ }
911
+ zsv_compare_delete(data);
912
+ return err;
913
+ }