zsv 1.3.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +41 -3
  4. data/ext/zsv/extconf.rb +1 -1
  5. data/lib/zsv/version.rb +1 -1
  6. metadata +6 -226
  7. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +0 -756
  8. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +0 -381
  9. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +0 -228
  10. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +0 -123
  11. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +0 -39
  12. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +0 -104
  13. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +0 -41
  14. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +0 -1
  15. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +0 -14
  16. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +0 -19
  17. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +0 -116
  18. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +0 -194
  19. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +0 -796
  20. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +0 -41
  21. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +0 -16
  22. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +0 -280
  23. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +0 -36
  24. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +0 -913
  25. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +0 -23
  26. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +0 -20
  27. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +0 -140
  28. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +0 -91
  29. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +0 -81
  30. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +0 -82
  31. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +0 -404
  32. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +0 -569
  33. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +0 -365
  34. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +0 -366
  35. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +0 -341
  36. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +0 -263
  37. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +0 -298
  38. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +0 -157
  39. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +0 -177
  40. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +0 -444
  41. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +0 -145
  42. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +0 -110
  43. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +0 -15
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +0 -64
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +0 -1955
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +0 -6802
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +0 -230517
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +0 -12174
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +0 -2
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +0 -142
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +0 -49
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +0 -485
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +0 -1015
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +0 -663
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +0 -85
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +0 -75
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +0 -167
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +0 -228
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +0 -186
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +0 -23
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +0 -76
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +0 -167
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +0 -238
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +0 -186
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +0 -184
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +0 -52
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +0 -34
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +0 -103
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +0 -57
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +0 -69
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +0 -220
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +0 -34
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +0 -362
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +0 -764
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +0 -117
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +0 -508
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +0 -78
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +0 -505
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +0 -7
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +0 -59
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +0 -208
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +0 -795
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +0 -28
  84. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +0 -851
  85. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +0 -106
  86. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +0 -6
  87. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +0 -113
  88. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +0 -90
  89. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +0 -295
  90. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +0 -175
  91. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +0 -693
  92. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +0 -980
  93. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +0 -131
  94. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +0 -130
  95. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +0 -118
  96. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +0 -45
  97. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +0 -41
  98. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +0 -107
  99. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +0 -20
  100. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +0 -61
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +0 -14
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +0 -192
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +0 -72
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +0 -812
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +0 -753
  106. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +0 -372
  107. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +0 -15
  108. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +0 -119
  109. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +0 -45
  110. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +0 -63
  111. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +0 -12
  112. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +0 -166
  113. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +0 -214
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +0 -128
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +0 -43
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +0 -81
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +0 -25
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +0 -325
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +0 -73
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +0 -203
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +0 -7
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +0 -318
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +0 -134
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +0 -119
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +0 -322
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +0 -203
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +0 -36
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +0 -167
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +0 -36
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +0 -153
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +0 -32
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +0 -312
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +0 -29
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +0 -266
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +0 -9
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +0 -60
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +0 -1007
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +0 -453
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +0 -101
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +0 -49
  141. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +0 -393
  142. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +0 -322
  143. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +0 -228
  144. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +0 -91
  145. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +0 -240
  146. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +0 -63
  147. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +0 -57
  148. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +0 -148
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +0 -2
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +0 -427
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +0 -253
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +0 -121
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +0 -20
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +0 -159
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +0 -24
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +0 -180
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +0 -256
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +0 -197
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +0 -49
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +0 -400
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +0 -120
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +0 -18
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +0 -132
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +0 -178
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +0 -258
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +0 -246
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +0 -123
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +0 -153
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +0 -54
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +0 -267
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +0 -53
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +0 -357
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +0 -83
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +0 -33
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +0 -184
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +0 -292
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +0 -259
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +0 -13
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +0 -255
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +0 -96
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +0 -361
  182. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +0 -40
  183. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +0 -16
  184. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +0 -44
  185. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +0 -3
  186. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +0 -100
  187. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +0 -143
  188. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +0 -89
  189. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +0 -123
  190. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +0 -16
  191. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +0 -336
  192. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +0 -361
  193. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +0 -62
  194. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +0 -113
  195. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +0 -73
  196. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +0 -329
  197. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +0 -90
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +0 -49
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +0 -36
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +0 -58
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +0 -19
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +0 -147
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +0 -22
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +0 -28
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +0 -22
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +0 -17
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +0 -99
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +0 -65
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +0 -19
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +0 -19
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +0 -13
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +0 -54
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +0 -71
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +0 -53
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +0 -107
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +0 -18
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +0 -11
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +0 -148
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +0 -41
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +0 -25
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +0 -101
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +0 -33
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +0 -20
  224. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +0 -60
  225. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +0 -484
  226. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +0 -731
  227. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +0 -285
  228. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +0 -88
  229. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +0 -51
@@ -1,851 +0,0 @@
1
- /*
2
- * Copyright (C) 2021 Liquidaty and zsv contributors. All rights reserved.
3
- *
4
- * This file is part of zsv/lib, distributed under the MIT license as defined at
5
- * https://opensource.org/licenses/MIT
6
- */
7
-
8
- #include <stdio.h>
9
- #include <stdlib.h>
10
- #include <string.h>
11
- #include <sglib.h>
12
-
13
- #include <unistd.h> // unlink
14
-
15
- #define ZSV_COMMAND flatten
16
- #include "zsv_command.h"
17
-
18
- #include <zsv/utils/writer.h>
19
- #include <zsv/utils/file.h>
20
- #include <zsv/utils/utf8.h>
21
- #include <zsv/utils/mem.h>
22
- #include <zsv/utils/string.h>
23
- #include <memfile.h>
24
- #include <jsonwriter.h>
25
-
26
- enum flatten_agg_method {
27
- flatten_agg_method_none = 1,
28
- flatten_agg_method_delim,
29
- flatten_agg_method_json
30
- };
31
-
32
- struct flatten_column_name_and_ix {
33
- unsigned char *name;
34
- size_t name_len;
35
- unsigned int ix_plus_1;
36
- unsigned char free_name : 1;
37
- unsigned char dummy : 7;
38
- };
39
-
40
- struct chars_list {
41
- struct chars_list *next;
42
- unsigned char *value;
43
- };
44
-
45
- static struct chars_list *chars_list_new(const unsigned char *utf8_value, size_t len) {
46
- struct chars_list *e = calloc(1, sizeof(*e));
47
- if (e)
48
- e->value = zsv_memdup(utf8_value, len);
49
- return e;
50
- }
51
-
52
- #ifndef FREEIF
53
- #define FREEIF(x) \
54
- if (x) \
55
- free(x), x = NULL
56
- #endif
57
-
58
- static void chars_lists_delete(struct chars_list **p) {
59
- if (p && *p) {
60
- struct chars_list *next;
61
- for (struct chars_list *e = *p; e; e = next) {
62
- next = e->next;
63
- FREEIF(e->value);
64
- free(e);
65
- }
66
- *p = NULL;
67
- }
68
- }
69
-
70
- struct flatten_agg_col {
71
- struct flatten_agg_col *next;
72
- struct flatten_column_name_and_ix column;
73
- struct chars_list *values, **last_value;
74
- enum flatten_agg_method agg_method;
75
- unsigned char *delimiter;
76
- };
77
-
78
- struct flatten_agg_col_iterator {
79
- unsigned char *str;
80
- size_t len;
81
-
82
- // internal use only
83
- struct chars_list *current_cl;
84
- };
85
-
86
- static void flatten_agg_col_iterator_init(struct flatten_agg_col *c, struct flatten_agg_col_iterator *i) {
87
- memset(i, 0, sizeof(*i));
88
- switch (c->agg_method) {
89
- case flatten_agg_method_json:
90
- case flatten_agg_method_delim:
91
- if ((i->current_cl = c->values))
92
- i->str = i->current_cl->value;
93
- break;
94
- default:
95
- break;
96
- }
97
- }
98
-
99
- static void flatten_agg_col_iterator_replace_str(struct flatten_agg_col_iterator *i, unsigned char **new_s) {
100
- if (i->current_cl)
101
- i->current_cl->value = *new_s;
102
- else {
103
- fprintf(stderr, "flatten_agg_col_iterator_replace_str() error: no current value to replace\n");
104
- free(*new_s);
105
- *new_s = NULL;
106
- }
107
- }
108
-
109
- static void flatten_agg_col_iterator_next(struct flatten_agg_col_iterator *i) {
110
- if (i->current_cl && (i->current_cl = i->current_cl->next))
111
- i->str = i->current_cl->value;
112
- }
113
-
114
- static char flatten_agg_col_iterator_done(struct flatten_agg_col_iterator *i) {
115
- return i->current_cl ? 0 : 1;
116
- }
117
-
118
- static const unsigned char *flatten_agg_col_delimiter(struct flatten_agg_col *c) {
119
- if (c->delimiter)
120
- return c->delimiter;
121
- switch (c->agg_method) {
122
- case flatten_agg_method_json:
123
- return NULL;
124
- case flatten_agg_method_none:
125
- case flatten_agg_method_delim:
126
- return (const unsigned char *)"|";
127
- }
128
- return (const unsigned char *)"|";
129
- }
130
-
131
- static void flatten_agg_col_add_value(struct flatten_agg_col *c, const unsigned char *utf8_value, size_t len) {
132
- if (!c->last_value)
133
- c->last_value = &c->values;
134
- struct chars_list *e = chars_list_new(utf8_value, len);
135
- if (e) {
136
- *c->last_value = e;
137
- c->last_value = &e->next;
138
- }
139
- }
140
-
141
- typedef struct flatten_output_column {
142
- struct flatten_output_column *next;
143
- unsigned char *name;
144
- size_t name_len;
145
- unsigned char *compare_name; // same as name, unless case-insensitive in which case, lower case
146
- unsigned char *current_value;
147
-
148
- struct flatten_output_column *left;
149
- struct flatten_output_column *right;
150
- unsigned char color : 1;
151
- unsigned char dummy : 7;
152
- } flatten_output_column;
153
-
154
- void flatten_output_column_free(struct flatten_output_column *e) {
155
- FREEIF(e->name);
156
- FREEIF(e->compare_name);
157
- FREEIF(e->current_value);
158
- }
159
-
160
- int flatten_output_column_compare(flatten_output_column *x, flatten_output_column *y) {
161
- return strcmp((char *)x->compare_name, (char *)y->compare_name);
162
- }
163
-
164
- SGLIB_DEFINE_RBTREE_PROTOTYPES(flatten_output_column, left, right, color, flatten_output_column_compare);
165
-
166
- SGLIB_DEFINE_RBTREE_FUNCTIONS(flatten_output_column, left, right, color, flatten_output_column_compare);
167
-
168
- struct flatten_data {
169
- unsigned int max_cols;
170
- unsigned int output_column_total_count;
171
-
172
- struct flatten_output_column *output_columns_by_value;
173
-
174
- // output_columns_by_value, linked list
175
- struct flatten_output_column *output_columns_by_value_head;
176
- struct flatten_output_column **output_columns_by_value_tail;
177
-
178
- unsigned int current_column_index;
179
- unsigned int row_count;
180
- unsigned int row_count2;
181
- unsigned int output_row;
182
-
183
- struct flatten_column_name_and_ix row_id_column;
184
- struct flatten_column_name_and_ix column_name_column;
185
- struct flatten_column_name_and_ix value_column;
186
-
187
- struct flatten_output_column *current_column_name_column;
188
- unsigned char *current_column_name_value;
189
-
190
- unsigned char *last_asset_id;
191
- size_t last_asset_id_len;
192
- unsigned char *current_asset_id; // will equal last_asset_id if they are the same
193
- size_t current_asset_id_len;
194
-
195
- const char *output_filename;
196
-
197
- FILE *in;
198
- FILE *out;
199
- const char *input_path;
200
-
201
- zsv_csv_writer csv_writer;
202
-
203
- struct flatten_agg_col *agg_output_cols;
204
- struct flatten_agg_col **agg_output_cols_vector;
205
- unsigned int agg_output_cols_vector_size;
206
-
207
- int max_rows_per_aggregation;
208
-
209
- // for json output: jsw and memfile
210
- jsonwriter_handle jsw;
211
- memfile_t memfile;
212
-
213
- enum flatten_agg_method all_aggregation_method;
214
-
215
- unsigned char cancelled : 1;
216
- unsigned char verbose : 1;
217
- unsigned char have_agg : 1;
218
- unsigned char dummy : 5;
219
- };
220
-
221
- static int flatten_output_column_add(struct flatten_data *data, const unsigned char *utf8_value, size_t len,
222
- unsigned char *compare_name) {
223
- if (data->output_column_total_count == data->max_cols) {
224
- free(compare_name);
225
- return zsv_printerr(1, "ERROR: Maximum number of columns (%i) exceeded", data->max_cols);
226
- }
227
-
228
- struct flatten_output_column *new_output_column = calloc(1, sizeof(*new_output_column));
229
- new_output_column->name = zsv_memdup(utf8_value, len);
230
- new_output_column->name_len = len;
231
- new_output_column->compare_name = compare_name;
232
-
233
- // add to rbtree
234
- sglib_flatten_output_column_add(&data->output_columns_by_value, new_output_column);
235
-
236
- // also add to linked list
237
- *data->output_columns_by_value_tail = new_output_column;
238
- data->output_columns_by_value_tail = &new_output_column->next;
239
- data->output_column_total_count++;
240
- return 0;
241
- }
242
-
243
- static flatten_output_column *flatten_output_column_find(struct flatten_data *data, const unsigned char *utf8_value,
244
- size_t len, unsigned char **compare_name) {
245
- flatten_output_column node, *found;
246
- node.compare_name = zsv_strtolowercase(utf8_value, &len);
247
- if (node.compare_name) {
248
- if ((found = sglib_flatten_output_column_find_member(data->output_columns_by_value, &node))) {
249
- free(node.compare_name);
250
- return found;
251
- }
252
- // not found
253
- if (compare_name)
254
- *compare_name = node.compare_name;
255
- else
256
- free(node.compare_name);
257
- }
258
- return NULL;
259
- }
260
-
261
- static void set_cnx(struct flatten_column_name_and_ix *cnx, const unsigned char *utf8_value, size_t len,
262
- unsigned int current_column_ix) {
263
- if (!cnx->ix_plus_1) {
264
- if (!cnx->name) { // none provided, assume its the next column
265
- if ((cnx->name = zsv_memdup(utf8_value, len))) {
266
- cnx->free_name = 1;
267
- cnx->name_len = len;
268
- }
269
- cnx->ix_plus_1 = current_column_ix + 1;
270
- } else if (!zsv_strincmp(cnx->name, len, utf8_value, len))
271
- cnx->ix_plus_1 = current_column_ix + 1;
272
- }
273
- }
274
-
275
- // flatten_cell1(): for any value in the "column name" column, add it to the list of columns
276
- static void flatten_cell1(void *hook, unsigned char *utf8_value, size_t len) {
277
- struct flatten_data *data = hook;
278
- if (!data->cancelled) {
279
- if (data->row_count == 0) {
280
- struct flatten_column_name_and_ix *cnxlist[] = {&data->row_id_column, &data->column_name_column,
281
- &data->value_column};
282
- for (unsigned int i = 0; i < 3; i++)
283
- if (cnxlist[i]->name || (!data->have_agg && i == data->current_column_index))
284
- set_cnx(cnxlist[i], utf8_value, len, data->current_column_index);
285
- } else if (data->current_column_index + 1 == data->column_name_column.ix_plus_1) {
286
- // we are in the "column name" column, so make sure we've added this to our columns to output
287
- unsigned char *compare_name = NULL;
288
- if (!flatten_output_column_find(data, utf8_value, len, &compare_name) && compare_name)
289
- data->cancelled = flatten_output_column_add(data, utf8_value, len, compare_name);
290
- }
291
- }
292
- data->current_column_index++;
293
- }
294
-
295
- static void flatten_row1(void *hook) {
296
- struct flatten_data *data = hook;
297
- if (data->cancelled)
298
- return;
299
- data->row_count++;
300
- data->current_column_index = 0;
301
- }
302
-
303
- static void flatten_cell2(void *hook, unsigned char *utf8_value, size_t len) {
304
- struct flatten_data *data = hook;
305
- if (!data->cancelled) {
306
- if (data->row_count2 == 0) {
307
- if (!data->row_id_column.ix_plus_1)
308
- if (data->row_id_column.name || !data->have_agg)
309
- set_cnx(&data->row_id_column, utf8_value, len, data->current_column_index);
310
-
311
- for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next) {
312
- if (c->column.name_len == len && !zsv_strincmp(c->column.name, len, utf8_value, len))
313
- c->column.ix_plus_1 = data->current_column_index + 1;
314
- }
315
- } else {
316
- if (data->current_column_index < data->agg_output_cols_vector_size) {
317
- struct flatten_agg_col *c = data->agg_output_cols_vector[data->current_column_index];
318
- if (c)
319
- flatten_agg_col_add_value(c, utf8_value, len);
320
- }
321
-
322
- if (data->current_column_index + 1 == data->column_name_column.ix_plus_1) // column name
323
- data->current_column_name_column = flatten_output_column_find(data, utf8_value, len, NULL);
324
-
325
- else if (data->current_column_index + 1 == data->value_column.ix_plus_1) // value
326
- data->current_column_name_value = zsv_memdup(utf8_value, len);
327
-
328
- else if (data->current_column_index + 1 == data->row_id_column.ix_plus_1) { // asset ID
329
- if (!data->last_asset_id) { // no prior asset, so this is the first one
330
- data->last_asset_id = data->current_asset_id = zsv_memdup(utf8_value, len);
331
- data->last_asset_id_len = len;
332
- } else if (len != data->last_asset_id_len || memcmp(data->last_asset_id, utf8_value, len)) {
333
- // this is a different asset from the last one
334
- data->current_asset_id = zsv_memdup(utf8_value, len);
335
- data->current_asset_id_len = len;
336
- } else { // same as last asset
337
- data->current_asset_id = data->last_asset_id;
338
- data->current_asset_id_len = data->last_asset_id_len;
339
- }
340
- }
341
- }
342
- }
343
- data->current_column_index++;
344
- }
345
-
346
- static void flatten_output_header(struct flatten_data *data) {
347
- zsv_writer_cell(data->csv_writer, 1, data->row_id_column.name, data->row_id_column.name_len, 1);
348
- unsigned int i = 1;
349
- for (struct flatten_output_column *col = data->output_columns_by_value_head; col; col = col->next, i++) {
350
- zsv_writer_cell(data->csv_writer, 0, col->name, col->name_len, 1);
351
- }
352
-
353
- for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next)
354
- zsv_writer_cell(data->csv_writer, !i++, c->column.name, c->column.name_len, 1);
355
- data->output_row = 1;
356
- }
357
-
358
- static unsigned char *flatten_replace_delim(unsigned char *inout, const unsigned char *delimiter, char replacement) {
359
- if (!inout)
360
- return NULL;
361
-
362
- if (!strstr((char *)inout, (char *)delimiter))
363
- return inout;
364
-
365
- unsigned int delim_len = strlen((char *)delimiter);
366
- unsigned int j = strlen((char *)inout);
367
- unsigned char *new_s = malloc(j + 1);
368
- int new_s_len = 0;
369
- char clen;
370
- for (unsigned int i = 0; i < j; i += clen) {
371
- clen = ZSV_UTF8_CHARLEN_NOERR((int)inout[i]);
372
- if (i + clen <= j && strncmp((char *)inout + i, (char *)delimiter, delim_len))
373
- for (int k = 0; k < clen; k++)
374
- new_s[new_s_len++] = inout[i + k];
375
- else
376
- new_s[new_s_len++] = replacement;
377
- }
378
- if (new_s)
379
- new_s[new_s_len++] = 0;
380
- free(inout);
381
- return new_s;
382
- }
383
-
384
- static void output_current_row(struct flatten_data *data) {
385
- if (data->last_asset_id) {
386
- data->output_row++;
387
- zsv_writer_cell(data->csv_writer, 1, data->last_asset_id, data->last_asset_id_len, 1);
388
- for (struct flatten_output_column *col = data->output_columns_by_value_head; col; col = col->next) {
389
- zsv_writer_cell(data->csv_writer, 0, col->current_value,
390
- col->current_value ? strlen((char *)col->current_value) : 0, 1);
391
- }
392
-
393
- for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next) {
394
- unsigned char *value_to_print = NULL;
395
- size_t length_to_print = 0;
396
- struct flatten_agg_col_iterator it;
397
- if (c->agg_method == flatten_agg_method_json) {
398
- memfile_reset(data->memfile);
399
- jsonwriter_start_array(data->jsw);
400
- for (flatten_agg_col_iterator_init(c, &it); !flatten_agg_col_iterator_done(&it);
401
- flatten_agg_col_iterator_next(&it)) {
402
- // jsonwriter_str(data->jsw, it.str);
403
- if (!it.str || !*it.str)
404
- jsonwriter_null(data->jsw);
405
- else
406
- jsonwriter_unknown(data->jsw, it.str, strlen((const char *)it.str), 0);
407
- }
408
- jsonwriter_end_array(data->jsw);
409
- jsonwriter_flush(data->jsw);
410
- value_to_print = memfile_data(data->memfile);
411
- length_to_print = (size_t)memfile_tell(data->memfile);
412
- } else {
413
- const unsigned char *delimiter = flatten_agg_col_delimiter(c);
414
- if (!delimiter)
415
- delimiter = (const unsigned char *)"";
416
- size_t delimiter_len = strlen((const char *)delimiter);
417
- const char replacement = (*delimiter == '_' ? '.' : '_');
418
-
419
- // first, calc the length of joined string that we will need to create
420
- size_t joined_len = 0;
421
-
422
- int i = 0;
423
- for (flatten_agg_col_iterator_init(c, &it); !flatten_agg_col_iterator_done(&it);
424
- flatten_agg_col_iterator_next(&it), i++) {
425
- if (i)
426
- joined_len += delimiter_len;
427
- it.str = flatten_replace_delim(it.str, delimiter, replacement);
428
- flatten_agg_col_iterator_replace_str(&it, &it.str);
429
- if (it.str && *it.str)
430
- joined_len += strlen((char *)it.str);
431
- }
432
-
433
- if (joined_len && (value_to_print = malloc(joined_len))) {
434
- unsigned char *cursor = value_to_print;
435
- length_to_print = joined_len;
436
-
437
- i = 0;
438
- for (flatten_agg_col_iterator_init(c, &it); !flatten_agg_col_iterator_done(&it);
439
- flatten_agg_col_iterator_next(&it), i++) {
440
- // append delimiter
441
- if (i) {
442
- memcpy(cursor, delimiter, delimiter_len);
443
- cursor += delimiter_len;
444
- }
445
-
446
- // append value
447
- if (it.str && *it.str) {
448
- size_t len = strlen((char *)it.str);
449
- memcpy(cursor, it.str, len);
450
- cursor += len;
451
- }
452
- }
453
- }
454
- }
455
- zsv_writer_cell(data->csv_writer, 0, value_to_print, length_to_print, 1);
456
- if (c->agg_method != flatten_agg_method_json)
457
- free(value_to_print);
458
- chars_lists_delete(&c->values);
459
- c->last_value = NULL;
460
- }
461
- }
462
-
463
- for (struct flatten_output_column *col = data->output_columns_by_value_head; col; col = col->next)
464
- FREEIF(col->current_value);
465
- FREEIF(data->last_asset_id);
466
- }
467
-
468
- static void flatten_row2(void *hook) {
469
- struct flatten_data *data = hook;
470
- if (data->row_count2 == 0) {
471
- if (!data->row_id_column.ix_plus_1)
472
- fprintf(stderr, "No ID column found\n");
473
- if (data->current_column_index) {
474
- // set up the agg column vector
475
- data->agg_output_cols_vector_size = data->current_column_index;
476
- data->agg_output_cols_vector = calloc(data->agg_output_cols_vector_size, sizeof(*data->agg_output_cols_vector));
477
- for (struct flatten_agg_col *c = data->agg_output_cols; c; c = c->next) {
478
- if (c->column.ix_plus_1)
479
- data->agg_output_cols_vector[c->column.ix_plus_1 - 1] = c;
480
- }
481
- }
482
- } else {
483
- if (!data->current_asset_id && !data->last_asset_id)
484
- fprintf(stderr, "Warning: disregarding row %i: no asset id\n", data->row_count2);
485
- else {
486
- if (data->last_asset_id && data->last_asset_id != data->current_asset_id) {
487
- output_current_row(data);
488
- data->last_asset_id = data->current_asset_id;
489
- data->last_asset_id_len = data->current_asset_id_len;
490
- }
491
- if (data->current_column_name_column && data->current_column_name_value) {
492
- if (data->current_column_name_column->current_value) {
493
- fprintf(stderr, "Warning: multiple values for column %s, id %s: %s and %s\n",
494
- data->current_column_name_column->name, data->last_asset_id,
495
- data->current_column_name_column->current_value, data->current_column_name_value);
496
- FREEIF(data->current_column_name_column->current_value);
497
- }
498
- data->current_column_name_column->current_value = data->current_column_name_value;
499
- data->current_column_name_value = NULL;
500
- }
501
- }
502
- data->current_column_name_column = NULL;
503
- FREEIF(data->current_column_name_value);
504
- }
505
- data->current_column_index = 0;
506
- data->row_count2++;
507
- }
508
-
509
- const char *flatten_usage_msg[] = {
510
- APPNAME ": flatten a table",
511
- " based on a single-column key, assuming that rows to flatten always",
512
- " appear in contiguous lines",
513
- "",
514
- "Usage: " APPNAME " [<filename>] [<options>] -- [aggregate_output_spec ...]",
515
- "",
516
- "Each aggregate output specification consists of the column name or index, followed",
517
- // "either (i) a single-column aggregation or (future: (ii) the \"*\" placeholder (in conjunction with -a)).",
518
- "by the equal sign (=) and then an aggregation method, except that",
519
- "no equal sign suffix is needed if the --default-agg option is specified.",
520
- "If a column name contains an equal sign, it must be escaped with a preceding backslash.",
521
- "",
522
- "Aggregation methods:",
523
- // " max",
524
- // " min",
525
- " json (json array)",
526
- " delim (pipe-delimited)",
527
- // " arrayjs (json)",
528
- " delim_<delim> (user-specified delimiter)",
529
- // " unique (pipe-delimited)",
530
- // " uniquejs (json)",
531
- // " unique_<delim> (user-specified delimiter)",
532
- "",
533
- "Options:",
534
- " -b : output with BOM",
535
- " -v,--verbose : display verbose messages",
536
- " -C <max_columns_to_output> : maximum number of columns to output",
537
- " -m <max_rows_per_aggregation> : maximum number of rows (default: 1024)",
538
- " --row-id <column_name> : column name to group by",
539
- " --col-name <column_name> : column name specifying the output column name",
540
- " -V <column_name> : column name specifying the output value",
541
- // " --default-agg <method> : default aggregation method to use, if none specified",
542
- " -o <filename> : filename to save output to",
543
- NULL,
544
- };
545
-
546
- /*
547
- EXAMPLE
548
- echo 'row,col,val
549
- > A,ltv,100
550
- > A,loanid,A
551
- > A,hi,there
552
- > B,loanid,B
553
- > B,ltv,90
554
- > B,hi,you
555
- > B,xxx,zzz' | zsv flatten --row-id row --col-name col -V val
556
- row,ltv,loanid,hi,xxx
557
- A,100,A,there,
558
- B,90,B,you,zzz
559
- */
560
-
561
- static void flatten_usage(void) {
562
- for (size_t i = 0; flatten_usage_msg[i]; i++)
563
- fprintf(stdout, "%s\n", flatten_usage_msg[i]);
564
- }
565
-
566
- void flatten_agg_cols_delete(struct flatten_agg_col **p) {
567
- if (p && *p) {
568
- struct flatten_agg_col *next;
569
- for (struct flatten_agg_col *e = *p; e; e = next) {
570
- next = e->next;
571
- FREEIF(e->column.name);
572
- chars_lists_delete(&e->values);
573
- free(e);
574
- }
575
- *p = NULL;
576
- }
577
- }
578
-
579
- static struct flatten_agg_col *flatten_agg_col_new(const char *arg, int *err) {
580
- struct flatten_agg_col *e = calloc(1, sizeof(*e));
581
- if ((e->column.name = (unsigned char *)strdup(arg))) {
582
- e->column.name_len = strlen(arg);
583
- }
584
-
585
- unsigned char *write = e->column.name;
586
- unsigned char *write_end = e->column.name + e->column.name_len;
587
- unsigned char *read = e->column.name;
588
-
589
- unsigned char *agg_method_s = NULL;
590
-
591
- while (read && *read) {
592
- if (*read == '=') { // end of name!
593
- *read = '\0';
594
- agg_method_s = read + 1;
595
- e->column.name_len = read - e->column.name;
596
- break;
597
- } else if (*read == '\\') {
598
- read++;
599
- if (!*read)
600
- break;
601
- }
602
-
603
- *write = *read;
604
- write++;
605
- read++;
606
- }
607
-
608
- if (agg_method_s) {
609
- // for backward-compatibility, "array" or "array_" are treated the same as "delim" or "delim_"
610
- if (!strcmp((const char *)agg_method_s, "array") || !strcmp((const char *)agg_method_s, "delim"))
611
- e->agg_method = flatten_agg_method_delim;
612
- else if (!strcmp((const char *)agg_method_s, "json"))
613
- e->agg_method = flatten_agg_method_json;
614
- else if ((!strncmp((const char *)agg_method_s, "array_", strlen("array_")) &&
615
- strlen((const char *)agg_method_s) > strlen("array_"))) {
616
- e->agg_method = flatten_agg_method_delim;
617
- e->delimiter = agg_method_s + strlen("array_");
618
- } else if ((!strncmp((const char *)agg_method_s, "delim_", strlen("delim_")) &&
619
- strlen((const char *)agg_method_s) > strlen("delim_"))) {
620
- e->agg_method = flatten_agg_method_delim;
621
- e->delimiter = agg_method_s + strlen("delim_");
622
- } else
623
- *err =
624
- zsv_printerr(1, "Unrecognized aggregation method (expected json, delim or delim_<delim>): %s", agg_method_s);
625
- } else {
626
- *err = zsv_printerr(1, "No aggregation method specified for %s", arg);
627
- while (write < write_end) {
628
- *write = '\0';
629
- write++;
630
- }
631
- }
632
- if (!e->agg_method) {
633
- *err = 1;
634
- flatten_agg_cols_delete(&e);
635
- }
636
-
637
- return e;
638
- }
639
-
640
- static void flatten_cleanup(struct flatten_data *data) {
641
- flatten_agg_cols_delete(&data->agg_output_cols);
642
-
643
- if (data->in && data->in != stdin)
644
- fclose(data->in);
645
-
646
- struct flatten_column_name_and_ix *cnxlist[] = {&data->row_id_column, &data->column_name_column, &data->value_column};
647
- for (int i = 0; i < 3; i++) {
648
- struct flatten_column_name_and_ix *cnx = cnxlist[i];
649
- if (cnx->free_name)
650
- free(cnx->name);
651
- }
652
-
653
- for (struct flatten_output_column *next, *e = data->output_columns_by_value_head; e; e = next) {
654
- next = e->next;
655
- flatten_output_column_free(e);
656
- free(e);
657
- }
658
-
659
- FREEIF(data->agg_output_cols_vector);
660
- zsv_writer_delete(data->csv_writer);
661
- if (data->out && data->out != stdout)
662
- fclose(data->out);
663
-
664
- if (data->jsw)
665
- jsonwriter_delete(data->jsw);
666
- if (data->memfile)
667
- memfile_close(data->memfile);
668
- }
669
-
670
- int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
671
- struct zsv_prop_handler *custom_prop_handler) {
672
- if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
673
- flatten_usage();
674
- return 0;
675
- }
676
-
677
- struct zsv_opts opts = *optsp;
678
- struct flatten_data data = {0};
679
- struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
680
-
681
- data.output_columns_by_value_tail = &data.output_columns_by_value_head;
682
- data.max_rows_per_aggregation = 1024;
683
- data.max_cols = 1024;
684
-
685
- int err = 0;
686
- int agg_arg_i = 0;
687
-
688
- for (int arg_i = 1; arg_i < argc; arg_i++) {
689
- if (!strcmp(argv[arg_i], "--")) {
690
- agg_arg_i = arg_i + 1;
691
- break;
692
- } else if (!strcmp(argv[arg_i], "-b"))
693
- writer_opts.with_bom = 1;
694
- else if (!strcmp(argv[arg_i], "-C")) {
695
- if (!(arg_i + 1 < argc && atoi(argv[arg_i + 1]) > 9))
696
- err = zsv_printerr(1, "%s invalid: should be positive integer > 9 (got %s)", argv[arg_i], argv[arg_i + 1]);
697
- else
698
- data.max_cols = atoi(argv[++arg_i]);
699
- } else if (!strcmp(argv[arg_i], "-m")) {
700
- if (!(arg_i + 1 < argc && atoi(argv[arg_i + 1]) > 1))
701
- err = zsv_printerr(1, "%s invalid: should be positive integer > 1 (got %s)", argv[arg_i], argv[arg_i + 1]);
702
- else
703
- data.max_rows_per_aggregation = atoi(argv[++arg_i]);
704
- } else if (!strcmp(argv[arg_i], "--row-id")) { // used to be -i
705
- if (!(arg_i + 1 < argc && *argv[arg_i + 1]))
706
- err = zsv_printerr(1, "%s option: missing column name", argv[arg_i]);
707
- else {
708
- data.row_id_column.name = (unsigned char *)argv[++arg_i];
709
- data.row_id_column.name_len = strlen((char *)data.row_id_column.name);
710
- }
711
- } else if (!strcmp(argv[arg_i], "--col-name")) { // used to be -c
712
- if (!(arg_i + 1 < argc && *argv[arg_i + 1]))
713
- err = zsv_printerr(1, "%s option: missing column name", argv[arg_i]);
714
- else {
715
- data.column_name_column.name = (unsigned char *)argv[++arg_i];
716
- data.column_name_column.name_len = strlen((char *)data.column_name_column.name);
717
- }
718
- } else if (!strcmp(argv[arg_i], "-V")) {
719
- if (!(arg_i + 1 < argc))
720
- err = zsv_printerr(1, "-V option: missing column name");
721
- else {
722
- data.value_column.name = (unsigned char *)argv[++arg_i];
723
- data.value_column.name_len = strlen((char *)data.value_column.name);
724
- }
725
- } else if (!strcmp(argv[arg_i], "-o")) {
726
- if (!(arg_i + 1 < argc))
727
- err = zsv_printerr(1, "-o option: missing filename");
728
- else if (*argv[arg_i + 1] == '-')
729
- err = zsv_printerr(1, "-o option: filename may not start with '-' (got %s)", argv[arg_i + 1]);
730
- else
731
- data.output_filename = argv[++arg_i];
732
- } else if (data.in)
733
- err = zsv_printerr(1, "Input file was specified, cannot also read: %s", argv[arg_i]);
734
- else if (!(data.in = fopen(argv[arg_i], "rb")))
735
- err = zsv_printerr(1, "Could not open for reading: %s", argv[arg_i]);
736
- else
737
- data.input_path = argv[arg_i];
738
- }
739
-
740
- if (!data.in) {
741
- #ifdef NO_STDIN
742
- err = zsv_printerr(1, "Please specify an input file");
743
- #else
744
- data.in = stdin;
745
- #endif
746
- }
747
-
748
- if (err) {
749
- flatten_cleanup(&data);
750
- return 1;
751
- }
752
-
753
- if (agg_arg_i && agg_arg_i < argc) {
754
- struct flatten_agg_col **nextp = &data.agg_output_cols;
755
- for (int arg_i = 0; !err && arg_i + agg_arg_i < argc; arg_i++) {
756
- const char *arg = argv[arg_i + agg_arg_i];
757
- struct flatten_agg_col *cs = flatten_agg_col_new(arg, &err);
758
- if (cs) {
759
- data.have_agg = 1;
760
- *nextp = cs;
761
- nextp = &cs->next;
762
-
763
- if (cs->agg_method == flatten_agg_method_json) {
764
- if (!data.memfile) {
765
- data.memfile = memfile_open(1024);
766
- data.jsw = jsonwriter_new_stream(memfile_write, data.memfile);
767
- if (!data.memfile || !data.jsw) {
768
- fprintf(stderr, "Unable to allocate memfile and/or jsonwriter\n");
769
- flatten_cleanup(&data);
770
- return 1;
771
- }
772
- jsonwriter_set_option(data.jsw, jsonwriter_option_compact);
773
- }
774
- }
775
- }
776
- }
777
- }
778
-
779
- if (!(data.out = writer_opts.stream = data.output_filename ? fopen(data.output_filename, "wb") : stdout))
780
- err = zsv_printerr(1, "Unable to open %s for writing", data.output_filename);
781
-
782
- int passes = data.column_name_column.name || !data.have_agg ? 2 : 1;
783
- const char *input_path = NULL;
784
- FILE *in = NULL;
785
- char *tmp_fn = NULL;
786
- zsv_handle_ctrl_c_signal();
787
- if (passes == 1)
788
- in = data.in;
789
- else {
790
- tmp_fn = zsv_get_temp_filename("zsv_flatten_XXXXXXXX");
791
- if (tmp_fn) {
792
- FILE *tmp_f = fopen(tmp_fn, "w+b");
793
- opts.cell_handler = flatten_cell1;
794
- opts.row_handler = flatten_row1;
795
- opts.stream = data.in;
796
- input_path = data.input_path;
797
- opts.ctx = &data;
798
-
799
- zsv_parser handle;
800
- if (zsv_new_with_properties(&opts, custom_prop_handler, input_path, &handle) != zsv_status_ok)
801
- err = data.cancelled = zsv_printerr(1, "Unable to create csv parser");
802
- else {
803
- zsv_set_scan_filter(handle, zsv_filter_write, tmp_f);
804
- enum zsv_status status;
805
- while (!data.cancelled && !zsv_signal_interrupted && (status = zsv_parse_more(handle)) == zsv_status_ok)
806
- ;
807
- zsv_finish(handle);
808
- zsv_delete(handle);
809
- fflush(tmp_f);
810
- rewind(tmp_f);
811
- }
812
- in = tmp_f;
813
- }
814
- }
815
-
816
- if (!err) {
817
- struct zsv_opts opts2 = {0};
818
- opts2.cell_handler = flatten_cell2;
819
- opts2.row_handler = flatten_row2;
820
- opts2.ctx = &data;
821
- data.current_column_index = 0;
822
-
823
- if (!(data.csv_writer = zsv_writer_new(&writer_opts)))
824
- err = data.cancelled = zsv_printerr(1, "Unable to create csv writer");
825
-
826
- flatten_output_header(&data);
827
-
828
- opts2.stream = in;
829
- zsv_parser parser = zsv_new(&opts2);
830
- if (!parser)
831
- err = data.cancelled = zsv_printerr(1, "Unable to create csv parser");
832
-
833
- enum zsv_status status;
834
- while (!data.cancelled && !zsv_signal_interrupted && (status = zsv_parse_more(parser)) == zsv_status_ok)
835
- ;
836
- zsv_finish(parser);
837
- zsv_delete(parser);
838
- output_current_row(&data);
839
- }
840
- flatten_cleanup(&data);
841
-
842
- if (in && in != stdin)
843
- fclose(in);
844
-
845
- if (tmp_fn) {
846
- unlink(tmp_fn);
847
- free(tmp_fn);
848
- }
849
-
850
- return err;
851
- }