zsv 1.3.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +41 -3
  4. data/ext/zsv/extconf.rb +1 -1
  5. data/lib/zsv/version.rb +1 -1
  6. metadata +6 -226
  7. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +0 -756
  8. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +0 -381
  9. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +0 -228
  10. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +0 -123
  11. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +0 -39
  12. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +0 -104
  13. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +0 -41
  14. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +0 -1
  15. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +0 -14
  16. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +0 -19
  17. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +0 -116
  18. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +0 -194
  19. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +0 -796
  20. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +0 -41
  21. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +0 -16
  22. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +0 -280
  23. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +0 -36
  24. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +0 -913
  25. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +0 -23
  26. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +0 -20
  27. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +0 -140
  28. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +0 -91
  29. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +0 -81
  30. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +0 -82
  31. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +0 -404
  32. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +0 -569
  33. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +0 -365
  34. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +0 -366
  35. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +0 -341
  36. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +0 -263
  37. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +0 -298
  38. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +0 -157
  39. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +0 -177
  40. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +0 -444
  41. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +0 -145
  42. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +0 -110
  43. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +0 -15
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +0 -64
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +0 -1955
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +0 -6802
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +0 -230517
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +0 -12174
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +0 -2
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +0 -142
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +0 -49
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +0 -485
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +0 -1015
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +0 -663
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +0 -85
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +0 -75
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +0 -167
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +0 -228
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +0 -186
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +0 -23
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +0 -76
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +0 -167
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +0 -238
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +0 -186
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +0 -184
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +0 -52
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +0 -34
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +0 -103
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +0 -57
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +0 -69
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +0 -220
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +0 -34
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +0 -362
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +0 -764
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +0 -117
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +0 -508
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +0 -78
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +0 -505
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +0 -7
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +0 -59
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +0 -208
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +0 -795
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +0 -28
  84. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +0 -851
  85. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +0 -106
  86. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +0 -6
  87. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +0 -113
  88. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +0 -90
  89. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +0 -295
  90. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +0 -175
  91. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +0 -693
  92. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +0 -980
  93. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +0 -131
  94. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +0 -130
  95. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +0 -118
  96. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +0 -45
  97. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +0 -41
  98. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +0 -107
  99. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +0 -20
  100. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +0 -61
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +0 -14
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +0 -192
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +0 -72
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +0 -812
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +0 -753
  106. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +0 -372
  107. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +0 -15
  108. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +0 -119
  109. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +0 -45
  110. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +0 -63
  111. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +0 -12
  112. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +0 -166
  113. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +0 -214
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +0 -128
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +0 -43
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +0 -81
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +0 -25
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +0 -325
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +0 -73
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +0 -203
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +0 -7
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +0 -318
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +0 -134
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +0 -119
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +0 -322
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +0 -203
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +0 -36
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +0 -167
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +0 -36
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +0 -153
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +0 -32
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +0 -312
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +0 -29
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +0 -266
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +0 -9
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +0 -60
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +0 -1007
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +0 -453
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +0 -101
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +0 -49
  141. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +0 -393
  142. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +0 -322
  143. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +0 -228
  144. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +0 -91
  145. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +0 -240
  146. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +0 -63
  147. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +0 -57
  148. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +0 -148
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +0 -2
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +0 -427
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +0 -253
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +0 -121
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +0 -20
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +0 -159
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +0 -24
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +0 -180
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +0 -256
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +0 -197
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +0 -49
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +0 -400
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +0 -120
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +0 -18
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +0 -132
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +0 -178
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +0 -258
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +0 -246
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +0 -123
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +0 -153
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +0 -54
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +0 -267
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +0 -53
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +0 -357
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +0 -83
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +0 -33
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +0 -184
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +0 -292
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +0 -259
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +0 -13
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +0 -255
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +0 -96
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +0 -361
  182. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +0 -40
  183. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +0 -16
  184. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +0 -44
  185. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +0 -3
  186. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +0 -100
  187. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +0 -143
  188. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +0 -89
  189. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +0 -123
  190. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +0 -16
  191. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +0 -336
  192. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +0 -361
  193. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +0 -62
  194. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +0 -113
  195. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +0 -73
  196. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +0 -329
  197. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +0 -90
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +0 -49
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +0 -36
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +0 -58
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +0 -19
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +0 -147
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +0 -22
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +0 -28
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +0 -22
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +0 -17
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +0 -99
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +0 -65
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +0 -19
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +0 -19
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +0 -13
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +0 -54
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +0 -71
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +0 -53
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +0 -107
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +0 -18
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +0 -11
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +0 -148
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +0 -41
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +0 -25
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +0 -101
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +0 -33
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +0 -20
  224. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +0 -60
  225. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +0 -484
  226. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +0 -731
  227. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +0 -285
  228. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +0 -88
  229. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +0 -51
@@ -1,753 +0,0 @@
1
- /*
2
- * Copyright (C) 2025 Liquidaty and zsv contributors. All rights reserved.
3
- *
4
- * This file is part of zsv/lib, distributed under the MIT license as defined at
5
- * https://opensource.org/licenses/MIT
6
- */
7
-
8
- #include <stdio.h>
9
- #include <assert.h>
10
- #ifdef _WIN32
11
- #define _CRT_RAND_S /* for random number generator, used when sampling. must come before including stdlib.h */
12
- #else
13
- #include <sys/types.h> // off_t
14
- #endif
15
- #include <stdlib.h>
16
- #include <stdint.h>
17
- #include <string.h>
18
- #include <ctype.h>
19
- #include <time.h>
20
- #include <stdarg.h>
21
-
22
- // Added for pthreads and parallel I/O management
23
- #include <pthread.h>
24
- #include <string.h> // memcpy, free, etc.
25
-
26
- #define ZSV_COMMAND select
27
- #include "zsv_command.h"
28
-
29
- #include <zsv/utils/writer.h>
30
- #include <zsv/utils/utf8.h>
31
- #include <zsv/utils/string.h>
32
- #include <zsv/utils/mem.h>
33
- #include <zsv/utils/memmem.h>
34
- #include <zsv/utils/arg.h>
35
- #include <zsv/utils/os.h>
36
- #include <zsv/utils/file.h>
37
- #include "utils/chunk.h"
38
-
39
- #include "select/internal.h" // various defines and structs
40
- #include "select/usage.c" // zsv_select_usage()
41
- #include "select/rand.c" // demo_random_bw_1_and_100()
42
- #include "select/fixed.c" // auto_detect_fixed_column_sizes()
43
- #include "utils/cat.c"
44
-
45
- // zsv_select_add_search(), zsv_select_search_str_delete()
46
- #include "select/search.c"
47
-
48
- // struct zsv_select_regex, zsv_select_add_regex(), zsv_select_regexs_delete()
49
- #include "select/regex.c"
50
-
51
- // zsv_select_cell_clean(), zsv_select_row_search_hit()
52
- #include "select/processing.c"
53
-
54
- // zsv_select_add_exclusion(), zsv_select_get_header_name(),
55
- // zsv_select_check_exclusions_are_indexes()
56
- #include "select/selection.c"
57
-
58
- #ifndef ZSV_NO_PARALLEL
59
- #include "select/parallel.c" // zsv_parallel_data_new(), zsv_parallel_data_delete()
60
-
61
- #define ZSV_SELECT_PARALLEL_MIN_BYTES (1024 * 1024 * 2) // don't parallelize if < 2 MB of data (after header)
62
- #define ZSV_SELECT_PARALLEL_BUFFER_SZ (1024 * 1024 * 8) // to do: make customizable or dynamic
63
-
64
- static void zsv_select_data_row(void *ctx);
65
-
66
- static void zsv_select_data_row_parallel_done(void *ctx) {
67
- struct zsv_select_data *data = ctx;
68
- data->next_row_start = zsv_cum_scanned_length(data->parser) - zsv_row_length_raw_bytes(data->parser);
69
- zsv_abort(data->parser);
70
- data->cancelled = 1;
71
- }
72
- static void zsv_select_data_row_parallel(void *ctx) {
73
- struct zsv_select_data *data = ctx;
74
- zsv_select_data_row(ctx);
75
-
76
- if (UNLIKELY((off_t)zsv_cum_scanned_length(data->parser) >= data->end_offset_limit)) {
77
- // parse one more row to get accurate next-row start
78
- zsv_set_row_handler(data->parser, zsv_select_data_row_parallel_done);
79
- }
80
- }
81
-
82
- static void *zsv_select_process_chunk_internal(struct zsv_chunk_data *cdata) {
83
- if (cdata->start_offset >= cdata->end_offset) {
84
- cdata->skip = 1;
85
- return NULL;
86
- }
87
-
88
- struct zsv_select_data data = {0}; // local, non-shared zsv_select_data instance
89
-
90
- // Copy necessary setup data from the global context
91
- memcpy(&data, cdata->opts->ctx, sizeof(data));
92
- data.parallel_data = NULL; // clear parallel data pointer in local copy
93
- data.cancelled = 0; // necessary in case we are re-running due to incorrect chunk start
94
-
95
- #ifdef HAVE_PCRE2_8
96
- // duplicate data.search_regexs for thread safety
97
- if (data.search_regexs)
98
- data.search_regexs = zsv_select_regexs_dup(data.search_regexs);
99
- #endif
100
-
101
- struct zsv_opts opts = {0};
102
- opts.max_columns = cdata->opts->max_columns;
103
- opts.max_row_size = cdata->opts->max_row_size;
104
- opts.delimiter = cdata->opts->delimiter;
105
- opts.no_quotes = cdata->opts->no_quotes;
106
- opts.verbose = cdata->opts->verbose;
107
- opts.malformed_utf8_replace = cdata->opts->malformed_utf8_replace;
108
- opts.errprintf = cdata->opts->errprintf;
109
- opts.errf = cdata->opts->errf;
110
- opts.errclose = cdata->opts->errclose;
111
- opts.progress = cdata->opts->progress;
112
-
113
- // set up input
114
- FILE *stream = fopen(data.input_path, "rb");
115
- if (!stream) {
116
- cdata->status = zsv_status_error;
117
- return NULL;
118
- }
119
- fseeko(stream, cdata->start_offset, SEEK_SET);
120
-
121
- // set up output
122
- struct zsv_csv_writer_options writer_opts = {0};
123
-
124
- #ifdef __linux__
125
- cdata->tmp_output_filename = zsv_get_temp_filename("zsvselect");
126
- writer_opts.stream = fopen(cdata->tmp_output_filename, "wb");
127
- #else
128
- if (!(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ)) &&
129
- !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 2)) &&
130
- !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 4)) &&
131
- !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 8)))
132
- cdata->tmp_f = zsv_memfile_open(0);
133
- writer_opts.stream = cdata->tmp_f;
134
- writer_opts.write = (size_t(*)(const void *restrict, size_t, size_t, void *restrict))zsv_memfile_write;
135
- #endif
136
-
137
- if (!writer_opts.stream) {
138
- cdata->status = zsv_status_memory;
139
- fclose(stream);
140
- return NULL;
141
- }
142
- data.csv_writer = zsv_writer_new(&writer_opts);
143
-
144
- // initialize parser
145
- opts.stream = stream;
146
- opts.row_handler = zsv_select_data_row_parallel;
147
- opts.ctx = &data;
148
- data.end_offset_limit = cdata->end_offset - cdata->start_offset; // set chunk boundary
149
- data.parser = zsv_new(&opts);
150
-
151
- // process
152
- enum zsv_status status = zsv_status_ok;
153
- while (status == zsv_status_ok && !zsv_signal_interrupted && !data.cancelled)
154
- status = zsv_parse_more(data.parser);
155
-
156
- #ifndef ZSV_NOPARALLEL
157
- if (!data.next_row_start)
158
- // unlikely, but maybe conceivable if chunk split was not accurate and
159
- // a correctly-split chunk's last row entirely ate the next incorrectly-split chunk
160
- data.next_row_start = zsv_cum_scanned_length(data.parser) + 1;
161
- #endif
162
-
163
- // clean up
164
- zsv_delete(data.parser);
165
- #ifdef HAVE_PCRE2_8
166
- zsv_select_regexs_delete(data.search_regexs);
167
- #endif
168
- fflush(stream);
169
- fclose(stream);
170
- zsv_writer_delete(data.csv_writer);
171
- #ifdef __linux__
172
- fclose(writer_opts.stream);
173
- #endif
174
- cdata->actual_next_row_start = data.next_row_start + cdata->start_offset;
175
- cdata->status = zsv_status_ok;
176
- return NULL;
177
- }
178
-
179
- static void *zsv_select_process_chunk(void *arg) {
180
- struct zsv_chunk_data *cdata = (struct zsv_chunk_data *)arg;
181
- return zsv_select_process_chunk_internal(cdata);
182
- }
183
- #endif // ZSV_NO_PARALLEL
184
-
185
- // zsv_select_output_data_row(): output row data (No change needed)
186
- static void zsv_select_output_data_row(struct zsv_select_data *data) {
187
- unsigned int cnt = data->output_cols_count;
188
- char first = 1;
189
- if (data->prepend_line_number) {
190
- zsv_writer_cell_zu(data->csv_writer, first, data->data_row_count);
191
- first = 0;
192
- }
193
-
194
- /* print data row */
195
- for (unsigned int i = 0; i < cnt; i++) { // for each output column
196
- unsigned int in_ix = data->out2in[i].ix;
197
- struct zsv_cell cell = zsv_get_cell(data->parser, in_ix);
198
- if (UNLIKELY(data->any_clean != 0)) {
199
- // leading/trailing white may have been converted to NULL for regex search
200
- while (cell.len && *cell.str == '\0')
201
- cell.str++, cell.len--;
202
- while (cell.len && cell.str[cell.len - 1] == '\0')
203
- cell.len--;
204
- cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
205
- }
206
- if (VERY_UNLIKELY(data->distinct == ZSV_SELECT_DISTINCT_MERGE)) {
207
- if (UNLIKELY(cell.len == 0)) {
208
- for (struct zsv_select_uint_list *ix = data->out2in[i].merge.indexes; ix; ix = ix->next) {
209
- unsigned int m_ix = ix->value;
210
- cell = zsv_get_cell(data->parser, m_ix);
211
- if (cell.len) {
212
- if (UNLIKELY(data->any_clean != 0))
213
- cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
214
- if (cell.len)
215
- break;
216
- }
217
- }
218
- }
219
- }
220
- zsv_writer_cell(data->csv_writer, first, cell.str, cell.len, cell.quoted);
221
- first = 0;
222
- }
223
- }
224
-
225
- static void zsv_select_data_row(void *ctx) {
226
- struct zsv_select_data *data = ctx;
227
- if (UNLIKELY(zsv_cell_count(data->parser) == 0 || data->cancelled))
228
- return;
229
-
230
- data->data_row_count++;
231
-
232
- // check if we should skip this row
233
- data->skip_this_row = 0;
234
- if (UNLIKELY(data->skip_data_rows)) {
235
- data->skip_data_rows--;
236
- data->skip_this_row = 1;
237
- } else if (UNLIKELY(data->sample_every_n || data->sample_pct)) {
238
- data->skip_this_row = 1;
239
- if (data->sample_every_n && data->data_row_count % data->sample_every_n == 1)
240
- data->skip_this_row = 0;
241
- if (data->sample_pct && demo_random_bw_1_and_100() <= data->sample_pct)
242
- data->skip_this_row = 0;
243
- }
244
-
245
- if (LIKELY(!data->skip_this_row)) {
246
- // if we have a search filter, check that
247
- char skip = 0;
248
- skip = !zsv_select_row_search_hit(data);
249
- if (!skip) {
250
-
251
- // print the data row
252
- zsv_select_output_data_row(data);
253
- if (UNLIKELY(data->data_rows_limit > 0))
254
- if (data->data_row_count + 1 >= data->data_rows_limit)
255
- data->cancelled = 1;
256
- }
257
- }
258
- if (data->data_row_count % 25000 == 0 && data->verbose)
259
- fprintf(stderr, "Processed %zu rows\n", data->data_row_count);
260
- }
261
-
262
- static void zsv_select_print_header_row(struct zsv_select_data *data) {
263
- if (data->no_header)
264
- return;
265
- zsv_writer_cell_prepend(data->csv_writer, (const unsigned char *)data->prepend_header);
266
- if (data->prepend_line_number)
267
- zsv_writer_cell_s(data->csv_writer, 1, (const unsigned char *)"#", 0);
268
- for (unsigned int i = 0; i < data->output_cols_count; i++) {
269
- unsigned char *header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
270
- zsv_writer_cell_s(data->csv_writer, i == 0 && !data->prepend_line_number, header_name, 1);
271
- }
272
- zsv_writer_cell_prepend(data->csv_writer, NULL);
273
- }
274
-
275
- #ifndef ZSV_NO_PARALLEL
276
- static int zsv_setup_parallel_chunks(struct zsv_select_data *data, const char *path, size_t header_row_end) {
277
- if (data->num_chunks <= 1 || !path || !strcmp(path, "-")) {
278
- data->run_in_parallel = 0;
279
- return 0;
280
- }
281
-
282
- struct zsv_chunk_position *offsets =
283
- zsv_guess_file_chunks(path, data->num_chunks, ZSV_SELECT_PARALLEL_MIN_BYTES, header_row_end + 1
284
- #ifndef ZSV_NO_ONLY_CRLF
285
- ,
286
- data->opts->only_crlf_rowend
287
- #endif
288
- );
289
- if (!offsets)
290
- return -1; // fall back to serial
291
-
292
- if (!(data->parallel_data = zsv_parallel_data_new(data->num_chunks))) {
293
- zsv_free_chunks(offsets);
294
- fprintf(stderr, "Insufficient memory to parallelize!\n");
295
- return zsv_status_memory;
296
- }
297
-
298
- data->run_in_parallel = 1;
299
- data->parallel_data->main_data = data;
300
- data->end_offset_limit = offsets[0].end;
301
-
302
- for (unsigned int i = 0; i < data->num_chunks; i++) {
303
- data->parallel_data->chunk_data[i].start_offset = offsets[i].start;
304
- data->parallel_data->chunk_data[i].end_offset = offsets[i].end;
305
- if (data->opts->verbose)
306
- fprintf(stderr, "Chunk %i: %zu - %zu\n", i, (size_t)offsets[i].start, (size_t)offsets[i].end);
307
- }
308
- zsv_free_chunks(offsets);
309
- return 0;
310
- }
311
- #endif // ZSV_NO_PARALLEL
312
-
313
- static void zsv_select_header_finish(struct zsv_select_data *data) {
314
- if (zsv_select_set_output_columns(data)) {
315
- data->cancelled = 1;
316
- return;
317
- }
318
- #ifndef ZSV_NO_PARALLEL
319
- // set up parallelization; on error, fall back to serial
320
- // TO DO: option to exit on error (instead of fall back)
321
- if (data->input_path && data->num_chunks > 1) {
322
- size_t header_row_end = zsv_cum_scanned_length(data->parser);
323
- zsv_setup_parallel_chunks(data, data->input_path, header_row_end);
324
- }
325
- if (data->opts->verbose)
326
- fprintf(stderr, "Running %s\n", data->run_in_parallel ? "parallel" : "single-threaded");
327
-
328
- if (data->run_in_parallel) {
329
- struct zsv_parallel_data *pdata = data->parallel_data;
330
- zsv_select_print_header_row(data);
331
-
332
- // start worker threads
333
- for (unsigned int i = 1; i < data->num_chunks; i++) {
334
- struct zsv_chunk_data *cdata = &pdata->chunk_data[i];
335
- cdata->id = i;
336
- cdata->opts = data->opts;
337
-
338
- int create_status = pthread_create(&pdata->threads[i - 1], NULL, zsv_select_process_chunk, cdata);
339
- if (create_status != 0) {
340
- data->cancelled = 1;
341
- zsv_printerr(1, "Error creating worker thread for chunk %d: %s", i, strerror(create_status));
342
- return;
343
- }
344
- }
345
-
346
- // main thread processes chunk 1
347
- zsv_set_row_handler(data->parser, zsv_select_data_row_parallel);
348
- } else
349
- #endif
350
- {
351
- // no parallelization
352
- zsv_select_print_header_row(data);
353
- zsv_set_row_handler(data->parser, zsv_select_data_row);
354
- }
355
- }
356
-
357
- static void zsv_select_header_row(void *ctx) {
358
- struct zsv_select_data *data = ctx;
359
-
360
- if (data->cancelled)
361
- return;
362
-
363
- unsigned int cols = zsv_cell_count(data->parser);
364
- unsigned int max_header_ix = 0;
365
- for (unsigned int i = 0; i < cols; i++) {
366
- struct zsv_cell cell = zsv_get_cell(data->parser, i);
367
- if (UNLIKELY(data->any_clean != 0))
368
- cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
369
- if (i < data->opts->max_columns) {
370
- data->header_names[i] = zsv_memdup(cell.str, cell.len);
371
- max_header_ix = i + 1;
372
- }
373
- }
374
-
375
- // in case we want to make this an option later
376
- char trim_trailing_columns = 1;
377
- if (!trim_trailing_columns)
378
- max_header_ix = cols;
379
-
380
- if (max_header_ix > data->header_name_count)
381
- data->header_name_count = max_header_ix;
382
-
383
- zsv_select_header_finish(data);
384
- }
385
-
386
- static void zsv_select_cleanup(struct zsv_select_data *data) {
387
- if (data->opts->stream && data->opts->stream != stdin)
388
- fclose(data->opts->stream);
389
-
390
- zsv_writer_delete(data->csv_writer);
391
- zsv_select_search_str_delete(data->search_strings);
392
- #ifdef HAVE_PCRE2_8
393
- zsv_select_regexs_delete(data->search_regexs);
394
- #endif
395
-
396
- if (data->distinct == ZSV_SELECT_DISTINCT_MERGE) {
397
- for (unsigned int i = 0; i < data->output_cols_count; i++) {
398
- for (struct zsv_select_uint_list *next, *ix = data->out2in[i].merge.indexes; ix; ix = next) {
399
- next = ix->next;
400
- free(ix);
401
- }
402
- }
403
- }
404
- free(data->out2in);
405
-
406
- for (unsigned int i = 0; i < data->header_name_count; i++)
407
- free(data->header_names[i]);
408
- free(data->header_names);
409
-
410
- free(data->fixed.offsets);
411
-
412
- #ifndef ZSV_NO_PARALLEL
413
- if (data->run_in_parallel)
414
- zsv_parallel_data_delete(data->parallel_data);
415
- #endif
416
- }
417
-
418
- #define ARG_require_val(tgt, conv_func) \
419
- do { \
420
- if (++arg_i >= argc) { \
421
- stat = zsv_printerr(1, "%s option requires parameter", argv[arg_i - 1]); \
422
- goto zsv_select_main_done; \
423
- } \
424
- tgt = conv_func(argv[arg_i]); \
425
- } while (0)
426
-
427
- #ifndef ZSV_NO_PARALLEL
428
- static int zsv_merge_worker_outputs(struct zsv_select_data *data, FILE *dest_stream) {
429
- if (!data->run_in_parallel || !data->parallel_data)
430
- return 0;
431
-
432
- fflush(dest_stream);
433
- #ifdef __linux__
434
- int out_fd = fileno(dest_stream);
435
- #endif
436
- int status = 0;
437
-
438
- for (unsigned int i = 0; i < data->num_chunks - 1; i++) {
439
- pthread_join(data->parallel_data->threads[i], NULL);
440
-
441
- struct zsv_chunk_data *next_chunk = &data->parallel_data->chunk_data[i + 1];
442
- off_t actual_next_row_start =
443
- i == 0 ? data->next_row_start : data->parallel_data->chunk_data[i].actual_next_row_start;
444
- off_t expected_next_row_start = next_chunk->start_offset;
445
- if (actual_next_row_start > expected_next_row_start) {
446
- if (data->opts->verbose) {
447
- fprintf(stderr, "Chunk overlap detected (Prev End: %zu, Next Start: %zu). Reprocessing chunk %d.\n",
448
- (size_t)actual_next_row_start, (size_t)expected_next_row_start, i + 1);
449
- }
450
-
451
- // clean up invalid results from the worker thread
452
- zsv_chunk_data_clear_output(next_chunk);
453
-
454
- // adjust the start offset to the actual next row start
455
- next_chunk->start_offset = actual_next_row_start;
456
-
457
- // reprocess synchronously on the main thread
458
- zsv_select_process_chunk_internal(next_chunk);
459
-
460
- if (next_chunk->status != zsv_status_ok) // reprocessing failed!
461
- status = zsv_status_error;
462
- }
463
- }
464
-
465
- // join all of the output files into a single output file
466
- for (unsigned int i = 1; i < data->num_chunks && status == 0; i++) {
467
- struct zsv_chunk_data *c = &data->parallel_data->chunk_data[i];
468
- if (c->skip)
469
- continue;
470
- #ifdef __linux__
471
- int in_fd = open(c->tmp_output_filename, O_RDONLY);
472
- if (in_fd < 0) {
473
- zsv_printerr(1, "Error opening chunk %s: %s", c->tmp_output_filename, strerror(errno));
474
- status = zsv_status_error;
475
- break;
476
- }
477
-
478
- struct stat st;
479
- if (fstat(in_fd, &st) == 0) {
480
- long copied = zsv_concatenate_copy(out_fd, in_fd, st.st_size);
481
- if (copied != st.st_size) {
482
- zsv_printerr(1, "Warning: Partial copy chunk %d (%lli/%lli)", i, copied, (long long)st.st_size);
483
- status = zsv_status_error;
484
- }
485
- } else {
486
- status = zsv_status_error;
487
- }
488
- close(in_fd);
489
- #else
490
- zsv_memfile_rewind(c->tmp_f);
491
- if (zsv_copy_filelike_ptr(
492
- c->tmp_f, (size_t(*)(void *restrict ptr, size_t size, size_t nitems, void *restrict stream))zsv_memfile_read,
493
- dest_stream,
494
- (size_t(*)(const void *restrict ptr, size_t size, size_t nitems, void *restrict stream))fwrite)) {
495
- perror("zsv temp mem file");
496
- status = zsv_status_error;
497
- }
498
- #endif
499
- }
500
- return status;
501
- }
502
- #endif
503
-
504
- int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
505
- struct zsv_prop_handler *custom_prop_handler) {
506
- if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
507
- zsv_select_usage();
508
- return zsv_status_ok;
509
- }
510
-
511
- struct zsv_select_data data = {0};
512
- data.opts = opts;
513
- struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
514
- int col_index_arg_i = 0;
515
- unsigned char *preview_buff = NULL;
516
- size_t preview_buff_len = 0;
517
- enum zsv_status stat = zsv_status_ok;
518
-
519
- for (int arg_i = 1; stat == zsv_status_ok && arg_i < argc; arg_i++) {
520
- const char *arg = argv[arg_i];
521
- if (!strcmp(arg, "--")) {
522
- col_index_arg_i = arg_i + 1;
523
- break;
524
- }
525
-
526
- if (!strcmp(arg, "-b") || !strcmp(arg, "--with-bom"))
527
- writer_opts.with_bom = 1;
528
- else if (!strcmp(arg, "--fixed-auto-max-lines"))
529
- ARG_require_val(data.fixed.max_lines, atoi);
530
- else if (!strcmp(arg, "--fixed-auto"))
531
- data.fixed.autodetect = 1;
532
- else if (!strcmp(arg, "--fixed")) {
533
- if (++arg_i >= argc) {
534
- stat = zsv_printerr(1, "--fixed requires val");
535
- goto zsv_select_main_done;
536
- }
537
- data.fixed.count = 1;
538
- for (const char *s = argv[arg_i]; *s; s++)
539
- if (*s == ',')
540
- data.fixed.count++;
541
- free(data.fixed.offsets);
542
- data.fixed.offsets = calloc(data.fixed.count, sizeof(*data.fixed.offsets));
543
- if (!data.fixed.offsets) {
544
- stat = zsv_printerr(1, "Out of memory!");
545
- goto zsv_select_main_done;
546
- }
547
- size_t count = 0;
548
- char *dup = strdup(argv[arg_i]), *tok;
549
- for (tok = strtok(dup, ","); tok && count < data.fixed.count; tok = strtok(NULL, ",")) {
550
- if (sscanf(tok, "%zu", &data.fixed.offsets[count++]) != 1)
551
- stat = zsv_printerr(1, "Invalid offset: %s", tok);
552
- }
553
- free(dup);
554
- } else if (!strcmp(arg, "--distinct"))
555
- data.distinct = 1;
556
- else if (!strcmp(arg, "--merge"))
557
- data.distinct = ZSV_SELECT_DISTINCT_MERGE;
558
- else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) {
559
- if (writer_opts.stream && writer_opts.stream != stdout)
560
- stat = zsv_printerr(1, "Output specified twice");
561
- else {
562
- ARG_require_val(arg, (const char *));
563
- if (!(writer_opts.stream = fopen(arg, "wb")))
564
- stat = zsv_printerr(1, "Unable to open %s", arg);
565
- }
566
- } else if (!strcmp(arg, "-N") || !strcmp(arg, "--line-number"))
567
- data.prepend_line_number = 1;
568
- else if (!strcmp(arg, "-n"))
569
- data.use_header_indexes = 1;
570
- else if (!strcmp(arg, "-s") || !strcmp(arg, "--search")) {
571
- const char *v;
572
- ARG_require_val(v, (const char *));
573
- zsv_select_add_search(&data, v);
574
- }
575
- #ifdef HAVE_PCRE2_8
576
- else if (!strcmp(arg, "--regex-search")) {
577
- const char *v;
578
- ARG_require_val(v, (const char *));
579
- zsv_select_add_regex(&data, v);
580
- }
581
- #endif
582
- else if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
583
- data.verbose = 1;
584
- else if (!strcmp(arg, "--unescape"))
585
- data.unescape = 1;
586
- else if (!strcmp(arg, "-w") || !strcmp(arg, "--whitespace-clean"))
587
- data.clean_white = 1;
588
- else if (!strcmp(arg, "--whitespace-clean-no-newline")) {
589
- data.clean_white = 1;
590
- data.whitespace_clean_flags = 1;
591
- } else if (!strcmp(arg, "-W") || !strcmp(arg, "--no-trim"))
592
- data.no_trim_whitespace = 1;
593
- else if (!strcmp(arg, "--sample-every"))
594
- ARG_require_val(data.sample_every_n, atoi);
595
- else if (!strcmp(arg, "--sample-pct"))
596
- ARG_require_val(data.sample_pct, atof);
597
- else if (!strcmp(arg, "--prepend-header")) {
598
- int err = 0;
599
- data.prepend_header = zsv_next_arg(++arg_i, argc, argv, &err);
600
- if (err)
601
- stat = zsv_status_error;
602
- } else if (!strcmp(arg, "--no-header"))
603
- data.no_header = 1;
604
- else if (!strcmp(arg, "-H") || !strcmp(arg, "--head")) {
605
- int val;
606
- ARG_require_val(val, atoi);
607
- data.data_rows_limit = val + 1;
608
- } else if (!strcmp(arg, "-D") || !strcmp(arg, "--skip-data"))
609
- ARG_require_val(data.skip_data_rows, atoi);
610
- #ifndef ZSV_NO_PARALLEL
611
- else if (!strcmp(arg, "-j") || !strcmp(arg, "--jobs"))
612
- ARG_require_val(data.num_chunks, atoi);
613
- else if (!strcmp(arg, "--parallel")) {
614
- data.num_chunks = zsv_get_number_of_cores();
615
- if (data.num_chunks < 2) {
616
- fprintf(stderr, "Warning: --parallel specified but only one core found; using -j 4 instead");
617
- data.num_chunks = 4;
618
- }
619
- }
620
- #endif
621
- else if (!strcmp(arg, "-e")) {
622
- const char *v;
623
- ARG_require_val(v, (const char *));
624
- data.embedded_lineend = *v;
625
- } else if (!strcmp(arg, "-x")) {
626
- const char *v;
627
- ARG_require_val(v, (const char *));
628
- zsv_select_add_exclusion(&data, v);
629
- } else if (*arg == '-')
630
- stat = zsv_printerr(1, "Unrecognized argument: %s", arg);
631
- else if (data.input_path)
632
- stat = zsv_printerr(1, "Input specified twice");
633
- else
634
- data.input_path = arg;
635
- }
636
-
637
- if (stat != zsv_status_ok)
638
- goto zsv_select_main_done;
639
-
640
- // configuration & setup
641
- if (!writer_opts.stream)
642
- writer_opts.stream = stdout;
643
- if (data.sample_pct)
644
- srand(time(0));
645
- if (data.use_header_indexes && (stat = zsv_select_check_exclusions_are_indexes(&data)))
646
- goto zsv_select_main_done;
647
-
648
- #ifndef ZSV_NO_PARALLEL
649
- if (data.num_chunks > 1) {
650
- enum zsv_chunk_status chstat = zsv_chunkable(data.input_path, data.opts);
651
- if (chstat != zsv_chunk_status_ok) {
652
- stat = zsv_printerr(1, "%s", zsv_chunk_status_str(chstat));
653
- goto zsv_select_main_done;
654
- }
655
- }
656
- #endif
657
-
658
- // input stream
659
- if (data.input_path) {
660
- if (!(data.opts->stream = fopen(data.input_path, "rb")))
661
- stat = zsv_printerr(1, "Cannot open %s", data.input_path);
662
- } else {
663
- #ifdef NO_STDIN
664
- stat = zsv_printerr(1, "Input file required");
665
- goto zsv_select_main_done;
666
- #else
667
- data.opts->stream = stdin;
668
- #endif
669
- }
670
-
671
- // auto-fixed column detection
672
- if (data.fixed.autodetect) { // fixed-auto flag
673
- if (data.fixed.count)
674
- stat = zsv_printerr(1, "--fixed-auto cannot be used with --fixed");
675
- else {
676
- size_t bsz = 1024 * 256;
677
- if (!(preview_buff = calloc(bsz, 1)))
678
- stat = zsv_status_memory;
679
- else
680
- stat =
681
- auto_detect_fixed_column_sizes(&data.fixed, data.opts, preview_buff, bsz, &preview_buff_len, opts->verbose);
682
- }
683
- }
684
- if (stat != zsv_status_ok)
685
- goto zsv_select_main_done;
686
-
687
- // parser initialization
688
- if (col_index_arg_i) {
689
- data.col_argv = &argv[col_index_arg_i];
690
- data.col_argc = argc - col_index_arg_i;
691
- }
692
-
693
- data.header_names = calloc(data.opts->max_columns, sizeof(*data.header_names));
694
- data.out2in = calloc(data.opts->max_columns, sizeof(*data.out2in));
695
- data.csv_writer = zsv_writer_new(&writer_opts);
696
-
697
- if (!data.header_names || !data.out2in || !data.csv_writer) {
698
- stat = zsv_status_memory;
699
- goto zsv_select_main_done;
700
- }
701
-
702
- // execution
703
- data.opts->row_handler = zsv_select_header_row;
704
- data.opts->ctx = &data;
705
-
706
- if (zsv_new_with_properties(data.opts, custom_prop_handler, data.input_path, &data.parser) == zsv_status_ok) {
707
- data.any_clean = !data.no_trim_whitespace || data.clean_white || data.embedded_lineend || data.unescape;
708
-
709
- // apply fixed offsets (whether from --fixed arg or --fixed-auto detection)
710
- if (data.fixed.count && zsv_set_fixed_offsets(data.parser, data.fixed.count, data.fixed.offsets) != zsv_status_ok)
711
- data.cancelled = 1;
712
-
713
- unsigned char writer_buff[512];
714
- zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
715
-
716
- zsv_handle_ctrl_c_signal();
717
-
718
- enum zsv_status p_stat = zsv_status_ok;
719
- if (preview_buff_len)
720
- p_stat = zsv_parse_bytes(data.parser, preview_buff, preview_buff_len);
721
-
722
- while (p_stat == zsv_status_ok && !zsv_signal_interrupted && !data.cancelled)
723
- p_stat = zsv_parse_more(data.parser);
724
-
725
- if (p_stat == zsv_status_no_more_input) {
726
- zsv_finish(data.parser);
727
- #ifndef ZSV_NO_PARALLEL
728
- // unlikely, but maybe conceivable if chunk split was not accurate and
729
- // a correctly-split chunk's last row entirely ate the next incorrectly-split chunk
730
- if (data.run_in_parallel && !data.next_row_start)
731
- data.next_row_start = zsv_cum_scanned_length(data.parser) + 1;
732
- #endif
733
- }
734
- zsv_delete(data.parser);
735
-
736
- #ifndef ZSV_NO_PARALLEL
737
- if (data.run_in_parallel) {
738
- // explicitly flush and delete main writer before merge which uses raw fd
739
- zsv_writer_delete(data.csv_writer);
740
- data.csv_writer = NULL;
741
- if (zsv_merge_worker_outputs(&data, writer_opts.stream) != 0)
742
- stat = zsv_status_error;
743
- }
744
- #endif
745
- }
746
-
747
- zsv_select_main_done:
748
- free(preview_buff);
749
- zsv_select_cleanup(&data);
750
- if (writer_opts.stream && writer_opts.stream != stdout)
751
- fclose(writer_opts.stream);
752
- return stat;
753
- }