zsv 1.3.0 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +41 -3
  4. data/ext/zsv/extconf.rb +66 -49
  5. data/ext/zsv/options.c +1 -2
  6. data/ext/zsv/parser.c +14 -0
  7. data/ext/zsv/zsv_ext.c +3 -0
  8. data/lib/zsv/version.rb +1 -1
  9. metadata +6 -226
  10. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +0 -756
  11. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +0 -381
  12. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +0 -228
  13. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +0 -123
  14. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +0 -39
  15. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +0 -104
  16. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +0 -41
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +0 -1
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +0 -14
  19. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +0 -19
  20. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +0 -116
  21. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +0 -194
  22. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +0 -796
  23. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +0 -41
  24. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +0 -16
  25. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +0 -280
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +0 -36
  27. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +0 -913
  28. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +0 -23
  29. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +0 -20
  30. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +0 -140
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +0 -91
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +0 -81
  33. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +0 -82
  34. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +0 -404
  35. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +0 -569
  36. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +0 -365
  37. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +0 -366
  38. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +0 -341
  39. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +0 -263
  40. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +0 -298
  41. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +0 -157
  42. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +0 -177
  43. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +0 -444
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +0 -145
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +0 -110
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +0 -15
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +0 -64
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +0 -1955
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +0 -6802
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +0 -230517
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +0 -12174
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +0 -2
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +0 -142
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +0 -49
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +0 -485
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +0 -1015
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +0 -663
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +0 -85
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +0 -75
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +0 -167
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +0 -228
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +0 -186
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +0 -23
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +0 -76
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +0 -167
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +0 -238
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +0 -186
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +0 -184
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +0 -52
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +0 -34
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +0 -103
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +0 -57
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +0 -69
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +0 -220
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +0 -34
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +0 -362
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +0 -764
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +0 -117
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +0 -508
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +0 -78
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +0 -505
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +0 -7
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +0 -59
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +0 -208
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +0 -795
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +0 -28
  87. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +0 -851
  88. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +0 -106
  89. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +0 -6
  90. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +0 -113
  91. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +0 -90
  92. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +0 -295
  93. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +0 -175
  94. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +0 -693
  95. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +0 -980
  96. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +0 -131
  97. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +0 -130
  98. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +0 -118
  99. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +0 -45
  100. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +0 -41
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +0 -107
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +0 -20
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +0 -61
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +0 -14
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +0 -192
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +0 -72
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +0 -812
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +0 -753
  109. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +0 -372
  110. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +0 -15
  111. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +0 -119
  112. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +0 -45
  113. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +0 -63
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +0 -12
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +0 -166
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +0 -214
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +0 -128
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +0 -43
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +0 -81
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +0 -25
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +0 -325
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +0 -73
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +0 -203
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +0 -7
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +0 -318
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +0 -134
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +0 -119
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +0 -322
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +0 -203
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +0 -36
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +0 -167
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +0 -36
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +0 -153
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +0 -32
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +0 -312
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +0 -29
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +0 -266
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +0 -9
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +0 -60
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +0 -1007
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +0 -453
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +0 -101
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +0 -49
  144. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +0 -393
  145. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +0 -322
  146. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +0 -228
  147. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +0 -91
  148. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +0 -240
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +0 -63
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +0 -57
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +0 -148
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +0 -2
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +0 -427
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +0 -253
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +0 -121
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +0 -20
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +0 -159
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +0 -24
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +0 -180
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +0 -256
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +0 -197
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +0 -49
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +0 -400
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +0 -120
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +0 -18
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +0 -132
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +0 -178
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +0 -258
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +0 -246
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +0 -123
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +0 -153
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +0 -54
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +0 -267
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +0 -53
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +0 -357
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +0 -83
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +0 -33
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +0 -184
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +0 -292
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +0 -259
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +0 -13
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +0 -255
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +0 -96
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +0 -361
  185. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +0 -40
  186. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +0 -16
  187. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +0 -44
  188. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +0 -3
  189. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +0 -100
  190. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +0 -143
  191. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +0 -89
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +0 -123
  193. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +0 -16
  194. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +0 -336
  195. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +0 -361
  196. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +0 -62
  197. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +0 -113
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +0 -73
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +0 -329
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +0 -90
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +0 -49
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +0 -36
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +0 -58
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +0 -19
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +0 -147
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +0 -22
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +0 -28
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +0 -22
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +0 -17
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +0 -99
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +0 -65
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +0 -19
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +0 -19
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +0 -13
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +0 -54
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +0 -71
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +0 -53
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +0 -107
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +0 -18
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +0 -11
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +0 -148
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +0 -41
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +0 -25
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +0 -101
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +0 -33
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +0 -20
  227. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +0 -60
  228. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +0 -484
  229. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +0 -731
  230. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +0 -285
  231. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +0 -88
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +0 -51
@@ -1,753 +0,0 @@
1
- /*
2
- * Copyright (C) 2025 Liquidaty and zsv contributors. All rights reserved.
3
- *
4
- * This file is part of zsv/lib, distributed under the MIT license as defined at
5
- * https://opensource.org/licenses/MIT
6
- */
7
-
8
- #include <stdio.h>
9
- #include <assert.h>
10
- #ifdef _WIN32
11
- #define _CRT_RAND_S /* for random number generator, used when sampling. must come before including stdlib.h */
12
- #else
13
- #include <sys/types.h> // off_t
14
- #endif
15
- #include <stdlib.h>
16
- #include <stdint.h>
17
- #include <string.h>
18
- #include <ctype.h>
19
- #include <time.h>
20
- #include <stdarg.h>
21
-
22
- // Added for pthreads and parallel I/O management
23
- #include <pthread.h>
24
- #include <string.h> // memcpy, free, etc.
25
-
26
- #define ZSV_COMMAND select
27
- #include "zsv_command.h"
28
-
29
- #include <zsv/utils/writer.h>
30
- #include <zsv/utils/utf8.h>
31
- #include <zsv/utils/string.h>
32
- #include <zsv/utils/mem.h>
33
- #include <zsv/utils/memmem.h>
34
- #include <zsv/utils/arg.h>
35
- #include <zsv/utils/os.h>
36
- #include <zsv/utils/file.h>
37
- #include "utils/chunk.h"
38
-
39
- #include "select/internal.h" // various defines and structs
40
- #include "select/usage.c" // zsv_select_usage()
41
- #include "select/rand.c" // demo_random_bw_1_and_100()
42
- #include "select/fixed.c" // auto_detect_fixed_column_sizes()
43
- #include "utils/cat.c"
44
-
45
- // zsv_select_add_search(), zsv_select_search_str_delete()
46
- #include "select/search.c"
47
-
48
- // struct zsv_select_regex, zsv_select_add_regex(), zsv_select_regexs_delete()
49
- #include "select/regex.c"
50
-
51
- // zsv_select_cell_clean(), zsv_select_row_search_hit()
52
- #include "select/processing.c"
53
-
54
- // zsv_select_add_exclusion(), zsv_select_get_header_name(),
55
- // zsv_select_check_exclusions_are_indexes()
56
- #include "select/selection.c"
57
-
58
- #ifndef ZSV_NO_PARALLEL
59
- #include "select/parallel.c" // zsv_parallel_data_new(), zsv_parallel_data_delete()
60
-
61
- #define ZSV_SELECT_PARALLEL_MIN_BYTES (1024 * 1024 * 2) // don't parallelize if < 2 MB of data (after header)
62
- #define ZSV_SELECT_PARALLEL_BUFFER_SZ (1024 * 1024 * 8) // to do: make customizable or dynamic
63
-
64
- static void zsv_select_data_row(void *ctx);
65
-
66
- static void zsv_select_data_row_parallel_done(void *ctx) {
67
- struct zsv_select_data *data = ctx;
68
- data->next_row_start = zsv_cum_scanned_length(data->parser) - zsv_row_length_raw_bytes(data->parser);
69
- zsv_abort(data->parser);
70
- data->cancelled = 1;
71
- }
72
- static void zsv_select_data_row_parallel(void *ctx) {
73
- struct zsv_select_data *data = ctx;
74
- zsv_select_data_row(ctx);
75
-
76
- if (UNLIKELY((off_t)zsv_cum_scanned_length(data->parser) >= data->end_offset_limit)) {
77
- // parse one more row to get accurate next-row start
78
- zsv_set_row_handler(data->parser, zsv_select_data_row_parallel_done);
79
- }
80
- }
81
-
82
- static void *zsv_select_process_chunk_internal(struct zsv_chunk_data *cdata) {
83
- if (cdata->start_offset >= cdata->end_offset) {
84
- cdata->skip = 1;
85
- return NULL;
86
- }
87
-
88
- struct zsv_select_data data = {0}; // local, non-shared zsv_select_data instance
89
-
90
- // Copy necessary setup data from the global context
91
- memcpy(&data, cdata->opts->ctx, sizeof(data));
92
- data.parallel_data = NULL; // clear parallel data pointer in local copy
93
- data.cancelled = 0; // necessary in case we are re-running due to incorrect chunk start
94
-
95
- #ifdef HAVE_PCRE2_8
96
- // duplicate data.search_regexs for thread safety
97
- if (data.search_regexs)
98
- data.search_regexs = zsv_select_regexs_dup(data.search_regexs);
99
- #endif
100
-
101
- struct zsv_opts opts = {0};
102
- opts.max_columns = cdata->opts->max_columns;
103
- opts.max_row_size = cdata->opts->max_row_size;
104
- opts.delimiter = cdata->opts->delimiter;
105
- opts.no_quotes = cdata->opts->no_quotes;
106
- opts.verbose = cdata->opts->verbose;
107
- opts.malformed_utf8_replace = cdata->opts->malformed_utf8_replace;
108
- opts.errprintf = cdata->opts->errprintf;
109
- opts.errf = cdata->opts->errf;
110
- opts.errclose = cdata->opts->errclose;
111
- opts.progress = cdata->opts->progress;
112
-
113
- // set up input
114
- FILE *stream = fopen(data.input_path, "rb");
115
- if (!stream) {
116
- cdata->status = zsv_status_error;
117
- return NULL;
118
- }
119
- fseeko(stream, cdata->start_offset, SEEK_SET);
120
-
121
- // set up output
122
- struct zsv_csv_writer_options writer_opts = {0};
123
-
124
- #ifdef __linux__
125
- cdata->tmp_output_filename = zsv_get_temp_filename("zsvselect");
126
- writer_opts.stream = fopen(cdata->tmp_output_filename, "wb");
127
- #else
128
- if (!(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ)) &&
129
- !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 2)) &&
130
- !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 4)) &&
131
- !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 8)))
132
- cdata->tmp_f = zsv_memfile_open(0);
133
- writer_opts.stream = cdata->tmp_f;
134
- writer_opts.write = (size_t(*)(const void *restrict, size_t, size_t, void *restrict))zsv_memfile_write;
135
- #endif
136
-
137
- if (!writer_opts.stream) {
138
- cdata->status = zsv_status_memory;
139
- fclose(stream);
140
- return NULL;
141
- }
142
- data.csv_writer = zsv_writer_new(&writer_opts);
143
-
144
- // initialize parser
145
- opts.stream = stream;
146
- opts.row_handler = zsv_select_data_row_parallel;
147
- opts.ctx = &data;
148
- data.end_offset_limit = cdata->end_offset - cdata->start_offset; // set chunk boundary
149
- data.parser = zsv_new(&opts);
150
-
151
- // process
152
- enum zsv_status status = zsv_status_ok;
153
- while (status == zsv_status_ok && !zsv_signal_interrupted && !data.cancelled)
154
- status = zsv_parse_more(data.parser);
155
-
156
- #ifndef ZSV_NOPARALLEL
157
- if (!data.next_row_start)
158
- // unlikely, but maybe conceivable if chunk split was not accurate and
159
- // a correctly-split chunk's last row entirely ate the next incorrectly-split chunk
160
- data.next_row_start = zsv_cum_scanned_length(data.parser) + 1;
161
- #endif
162
-
163
- // clean up
164
- zsv_delete(data.parser);
165
- #ifdef HAVE_PCRE2_8
166
- zsv_select_regexs_delete(data.search_regexs);
167
- #endif
168
- fflush(stream);
169
- fclose(stream);
170
- zsv_writer_delete(data.csv_writer);
171
- #ifdef __linux__
172
- fclose(writer_opts.stream);
173
- #endif
174
- cdata->actual_next_row_start = data.next_row_start + cdata->start_offset;
175
- cdata->status = zsv_status_ok;
176
- return NULL;
177
- }
178
-
179
- static void *zsv_select_process_chunk(void *arg) {
180
- struct zsv_chunk_data *cdata = (struct zsv_chunk_data *)arg;
181
- return zsv_select_process_chunk_internal(cdata);
182
- }
183
- #endif // ZSV_NO_PARALLEL
184
-
185
- // zsv_select_output_data_row(): output row data (No change needed)
186
- static void zsv_select_output_data_row(struct zsv_select_data *data) {
187
- unsigned int cnt = data->output_cols_count;
188
- char first = 1;
189
- if (data->prepend_line_number) {
190
- zsv_writer_cell_zu(data->csv_writer, first, data->data_row_count);
191
- first = 0;
192
- }
193
-
194
- /* print data row */
195
- for (unsigned int i = 0; i < cnt; i++) { // for each output column
196
- unsigned int in_ix = data->out2in[i].ix;
197
- struct zsv_cell cell = zsv_get_cell(data->parser, in_ix);
198
- if (UNLIKELY(data->any_clean != 0)) {
199
- // leading/trailing white may have been converted to NULL for regex search
200
- while (cell.len && *cell.str == '\0')
201
- cell.str++, cell.len--;
202
- while (cell.len && cell.str[cell.len - 1] == '\0')
203
- cell.len--;
204
- cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
205
- }
206
- if (VERY_UNLIKELY(data->distinct == ZSV_SELECT_DISTINCT_MERGE)) {
207
- if (UNLIKELY(cell.len == 0)) {
208
- for (struct zsv_select_uint_list *ix = data->out2in[i].merge.indexes; ix; ix = ix->next) {
209
- unsigned int m_ix = ix->value;
210
- cell = zsv_get_cell(data->parser, m_ix);
211
- if (cell.len) {
212
- if (UNLIKELY(data->any_clean != 0))
213
- cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
214
- if (cell.len)
215
- break;
216
- }
217
- }
218
- }
219
- }
220
- zsv_writer_cell(data->csv_writer, first, cell.str, cell.len, cell.quoted);
221
- first = 0;
222
- }
223
- }
224
-
225
- static void zsv_select_data_row(void *ctx) {
226
- struct zsv_select_data *data = ctx;
227
- if (UNLIKELY(zsv_cell_count(data->parser) == 0 || data->cancelled))
228
- return;
229
-
230
- data->data_row_count++;
231
-
232
- // check if we should skip this row
233
- data->skip_this_row = 0;
234
- if (UNLIKELY(data->skip_data_rows)) {
235
- data->skip_data_rows--;
236
- data->skip_this_row = 1;
237
- } else if (UNLIKELY(data->sample_every_n || data->sample_pct)) {
238
- data->skip_this_row = 1;
239
- if (data->sample_every_n && data->data_row_count % data->sample_every_n == 1)
240
- data->skip_this_row = 0;
241
- if (data->sample_pct && demo_random_bw_1_and_100() <= data->sample_pct)
242
- data->skip_this_row = 0;
243
- }
244
-
245
- if (LIKELY(!data->skip_this_row)) {
246
- // if we have a search filter, check that
247
- char skip = 0;
248
- skip = !zsv_select_row_search_hit(data);
249
- if (!skip) {
250
-
251
- // print the data row
252
- zsv_select_output_data_row(data);
253
- if (UNLIKELY(data->data_rows_limit > 0))
254
- if (data->data_row_count + 1 >= data->data_rows_limit)
255
- data->cancelled = 1;
256
- }
257
- }
258
- if (data->data_row_count % 25000 == 0 && data->verbose)
259
- fprintf(stderr, "Processed %zu rows\n", data->data_row_count);
260
- }
261
-
262
- static void zsv_select_print_header_row(struct zsv_select_data *data) {
263
- if (data->no_header)
264
- return;
265
- zsv_writer_cell_prepend(data->csv_writer, (const unsigned char *)data->prepend_header);
266
- if (data->prepend_line_number)
267
- zsv_writer_cell_s(data->csv_writer, 1, (const unsigned char *)"#", 0);
268
- for (unsigned int i = 0; i < data->output_cols_count; i++) {
269
- unsigned char *header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
270
- zsv_writer_cell_s(data->csv_writer, i == 0 && !data->prepend_line_number, header_name, 1);
271
- }
272
- zsv_writer_cell_prepend(data->csv_writer, NULL);
273
- }
274
-
275
- #ifndef ZSV_NO_PARALLEL
276
- static int zsv_setup_parallel_chunks(struct zsv_select_data *data, const char *path, size_t header_row_end) {
277
- if (data->num_chunks <= 1 || !path || !strcmp(path, "-")) {
278
- data->run_in_parallel = 0;
279
- return 0;
280
- }
281
-
282
- struct zsv_chunk_position *offsets =
283
- zsv_guess_file_chunks(path, data->num_chunks, ZSV_SELECT_PARALLEL_MIN_BYTES, header_row_end + 1
284
- #ifndef ZSV_NO_ONLY_CRLF
285
- ,
286
- data->opts->only_crlf_rowend
287
- #endif
288
- );
289
- if (!offsets)
290
- return -1; // fall back to serial
291
-
292
- if (!(data->parallel_data = zsv_parallel_data_new(data->num_chunks))) {
293
- zsv_free_chunks(offsets);
294
- fprintf(stderr, "Insufficient memory to parallelize!\n");
295
- return zsv_status_memory;
296
- }
297
-
298
- data->run_in_parallel = 1;
299
- data->parallel_data->main_data = data;
300
- data->end_offset_limit = offsets[0].end;
301
-
302
- for (unsigned int i = 0; i < data->num_chunks; i++) {
303
- data->parallel_data->chunk_data[i].start_offset = offsets[i].start;
304
- data->parallel_data->chunk_data[i].end_offset = offsets[i].end;
305
- if (data->opts->verbose)
306
- fprintf(stderr, "Chunk %i: %zu - %zu\n", i, (size_t)offsets[i].start, (size_t)offsets[i].end);
307
- }
308
- zsv_free_chunks(offsets);
309
- return 0;
310
- }
311
- #endif // ZSV_NO_PARALLEL
312
-
313
- static void zsv_select_header_finish(struct zsv_select_data *data) {
314
- if (zsv_select_set_output_columns(data)) {
315
- data->cancelled = 1;
316
- return;
317
- }
318
- #ifndef ZSV_NO_PARALLEL
319
- // set up parallelization; on error, fall back to serial
320
- // TO DO: option to exit on error (instead of fall back)
321
- if (data->input_path && data->num_chunks > 1) {
322
- size_t header_row_end = zsv_cum_scanned_length(data->parser);
323
- zsv_setup_parallel_chunks(data, data->input_path, header_row_end);
324
- }
325
- if (data->opts->verbose)
326
- fprintf(stderr, "Running %s\n", data->run_in_parallel ? "parallel" : "single-threaded");
327
-
328
- if (data->run_in_parallel) {
329
- struct zsv_parallel_data *pdata = data->parallel_data;
330
- zsv_select_print_header_row(data);
331
-
332
- // start worker threads
333
- for (unsigned int i = 1; i < data->num_chunks; i++) {
334
- struct zsv_chunk_data *cdata = &pdata->chunk_data[i];
335
- cdata->id = i;
336
- cdata->opts = data->opts;
337
-
338
- int create_status = pthread_create(&pdata->threads[i - 1], NULL, zsv_select_process_chunk, cdata);
339
- if (create_status != 0) {
340
- data->cancelled = 1;
341
- zsv_printerr(1, "Error creating worker thread for chunk %d: %s", i, strerror(create_status));
342
- return;
343
- }
344
- }
345
-
346
- // main thread processes chunk 1
347
- zsv_set_row_handler(data->parser, zsv_select_data_row_parallel);
348
- } else
349
- #endif
350
- {
351
- // no parallelization
352
- zsv_select_print_header_row(data);
353
- zsv_set_row_handler(data->parser, zsv_select_data_row);
354
- }
355
- }
356
-
357
- static void zsv_select_header_row(void *ctx) {
358
- struct zsv_select_data *data = ctx;
359
-
360
- if (data->cancelled)
361
- return;
362
-
363
- unsigned int cols = zsv_cell_count(data->parser);
364
- unsigned int max_header_ix = 0;
365
- for (unsigned int i = 0; i < cols; i++) {
366
- struct zsv_cell cell = zsv_get_cell(data->parser, i);
367
- if (UNLIKELY(data->any_clean != 0))
368
- cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
369
- if (i < data->opts->max_columns) {
370
- data->header_names[i] = zsv_memdup(cell.str, cell.len);
371
- max_header_ix = i + 1;
372
- }
373
- }
374
-
375
- // in case we want to make this an option later
376
- char trim_trailing_columns = 1;
377
- if (!trim_trailing_columns)
378
- max_header_ix = cols;
379
-
380
- if (max_header_ix > data->header_name_count)
381
- data->header_name_count = max_header_ix;
382
-
383
- zsv_select_header_finish(data);
384
- }
385
-
386
- static void zsv_select_cleanup(struct zsv_select_data *data) {
387
- if (data->opts->stream && data->opts->stream != stdin)
388
- fclose(data->opts->stream);
389
-
390
- zsv_writer_delete(data->csv_writer);
391
- zsv_select_search_str_delete(data->search_strings);
392
- #ifdef HAVE_PCRE2_8
393
- zsv_select_regexs_delete(data->search_regexs);
394
- #endif
395
-
396
- if (data->distinct == ZSV_SELECT_DISTINCT_MERGE) {
397
- for (unsigned int i = 0; i < data->output_cols_count; i++) {
398
- for (struct zsv_select_uint_list *next, *ix = data->out2in[i].merge.indexes; ix; ix = next) {
399
- next = ix->next;
400
- free(ix);
401
- }
402
- }
403
- }
404
- free(data->out2in);
405
-
406
- for (unsigned int i = 0; i < data->header_name_count; i++)
407
- free(data->header_names[i]);
408
- free(data->header_names);
409
-
410
- free(data->fixed.offsets);
411
-
412
- #ifndef ZSV_NO_PARALLEL
413
- if (data->run_in_parallel)
414
- zsv_parallel_data_delete(data->parallel_data);
415
- #endif
416
- }
417
-
418
- #define ARG_require_val(tgt, conv_func) \
419
- do { \
420
- if (++arg_i >= argc) { \
421
- stat = zsv_printerr(1, "%s option requires parameter", argv[arg_i - 1]); \
422
- goto zsv_select_main_done; \
423
- } \
424
- tgt = conv_func(argv[arg_i]); \
425
- } while (0)
426
-
427
- #ifndef ZSV_NO_PARALLEL
428
- static int zsv_merge_worker_outputs(struct zsv_select_data *data, FILE *dest_stream) {
429
- if (!data->run_in_parallel || !data->parallel_data)
430
- return 0;
431
-
432
- fflush(dest_stream);
433
- #ifdef __linux__
434
- int out_fd = fileno(dest_stream);
435
- #endif
436
- int status = 0;
437
-
438
- for (unsigned int i = 0; i < data->num_chunks - 1; i++) {
439
- pthread_join(data->parallel_data->threads[i], NULL);
440
-
441
- struct zsv_chunk_data *next_chunk = &data->parallel_data->chunk_data[i + 1];
442
- off_t actual_next_row_start =
443
- i == 0 ? data->next_row_start : data->parallel_data->chunk_data[i].actual_next_row_start;
444
- off_t expected_next_row_start = next_chunk->start_offset;
445
- if (actual_next_row_start > expected_next_row_start) {
446
- if (data->opts->verbose) {
447
- fprintf(stderr, "Chunk overlap detected (Prev End: %zu, Next Start: %zu). Reprocessing chunk %d.\n",
448
- (size_t)actual_next_row_start, (size_t)expected_next_row_start, i + 1);
449
- }
450
-
451
- // clean up invalid results from the worker thread
452
- zsv_chunk_data_clear_output(next_chunk);
453
-
454
- // adjust the start offset to the actual next row start
455
- next_chunk->start_offset = actual_next_row_start;
456
-
457
- // reprocess synchronously on the main thread
458
- zsv_select_process_chunk_internal(next_chunk);
459
-
460
- if (next_chunk->status != zsv_status_ok) // reprocessing failed!
461
- status = zsv_status_error;
462
- }
463
- }
464
-
465
- // join all of the output files into a single output file
466
- for (unsigned int i = 1; i < data->num_chunks && status == 0; i++) {
467
- struct zsv_chunk_data *c = &data->parallel_data->chunk_data[i];
468
- if (c->skip)
469
- continue;
470
- #ifdef __linux__
471
- int in_fd = open(c->tmp_output_filename, O_RDONLY);
472
- if (in_fd < 0) {
473
- zsv_printerr(1, "Error opening chunk %s: %s", c->tmp_output_filename, strerror(errno));
474
- status = zsv_status_error;
475
- break;
476
- }
477
-
478
- struct stat st;
479
- if (fstat(in_fd, &st) == 0) {
480
- long copied = zsv_concatenate_copy(out_fd, in_fd, st.st_size);
481
- if (copied != st.st_size) {
482
- zsv_printerr(1, "Warning: Partial copy chunk %d (%lli/%lli)", i, copied, (long long)st.st_size);
483
- status = zsv_status_error;
484
- }
485
- } else {
486
- status = zsv_status_error;
487
- }
488
- close(in_fd);
489
- #else
490
- zsv_memfile_rewind(c->tmp_f);
491
- if (zsv_copy_filelike_ptr(
492
- c->tmp_f, (size_t(*)(void *restrict ptr, size_t size, size_t nitems, void *restrict stream))zsv_memfile_read,
493
- dest_stream,
494
- (size_t(*)(const void *restrict ptr, size_t size, size_t nitems, void *restrict stream))fwrite)) {
495
- perror("zsv temp mem file");
496
- status = zsv_status_error;
497
- }
498
- #endif
499
- }
500
- return status;
501
- }
502
- #endif
503
-
504
- int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
505
- struct zsv_prop_handler *custom_prop_handler) {
506
- if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
507
- zsv_select_usage();
508
- return zsv_status_ok;
509
- }
510
-
511
- struct zsv_select_data data = {0};
512
- data.opts = opts;
513
- struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
514
- int col_index_arg_i = 0;
515
- unsigned char *preview_buff = NULL;
516
- size_t preview_buff_len = 0;
517
- enum zsv_status stat = zsv_status_ok;
518
-
519
- for (int arg_i = 1; stat == zsv_status_ok && arg_i < argc; arg_i++) {
520
- const char *arg = argv[arg_i];
521
- if (!strcmp(arg, "--")) {
522
- col_index_arg_i = arg_i + 1;
523
- break;
524
- }
525
-
526
- if (!strcmp(arg, "-b") || !strcmp(arg, "--with-bom"))
527
- writer_opts.with_bom = 1;
528
- else if (!strcmp(arg, "--fixed-auto-max-lines"))
529
- ARG_require_val(data.fixed.max_lines, atoi);
530
- else if (!strcmp(arg, "--fixed-auto"))
531
- data.fixed.autodetect = 1;
532
- else if (!strcmp(arg, "--fixed")) {
533
- if (++arg_i >= argc) {
534
- stat = zsv_printerr(1, "--fixed requires val");
535
- goto zsv_select_main_done;
536
- }
537
- data.fixed.count = 1;
538
- for (const char *s = argv[arg_i]; *s; s++)
539
- if (*s == ',')
540
- data.fixed.count++;
541
- free(data.fixed.offsets);
542
- data.fixed.offsets = calloc(data.fixed.count, sizeof(*data.fixed.offsets));
543
- if (!data.fixed.offsets) {
544
- stat = zsv_printerr(1, "Out of memory!");
545
- goto zsv_select_main_done;
546
- }
547
- size_t count = 0;
548
- char *dup = strdup(argv[arg_i]), *tok;
549
- for (tok = strtok(dup, ","); tok && count < data.fixed.count; tok = strtok(NULL, ",")) {
550
- if (sscanf(tok, "%zu", &data.fixed.offsets[count++]) != 1)
551
- stat = zsv_printerr(1, "Invalid offset: %s", tok);
552
- }
553
- free(dup);
554
- } else if (!strcmp(arg, "--distinct"))
555
- data.distinct = 1;
556
- else if (!strcmp(arg, "--merge"))
557
- data.distinct = ZSV_SELECT_DISTINCT_MERGE;
558
- else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) {
559
- if (writer_opts.stream && writer_opts.stream != stdout)
560
- stat = zsv_printerr(1, "Output specified twice");
561
- else {
562
- ARG_require_val(arg, (const char *));
563
- if (!(writer_opts.stream = fopen(arg, "wb")))
564
- stat = zsv_printerr(1, "Unable to open %s", arg);
565
- }
566
- } else if (!strcmp(arg, "-N") || !strcmp(arg, "--line-number"))
567
- data.prepend_line_number = 1;
568
- else if (!strcmp(arg, "-n"))
569
- data.use_header_indexes = 1;
570
- else if (!strcmp(arg, "-s") || !strcmp(arg, "--search")) {
571
- const char *v;
572
- ARG_require_val(v, (const char *));
573
- zsv_select_add_search(&data, v);
574
- }
575
- #ifdef HAVE_PCRE2_8
576
- else if (!strcmp(arg, "--regex-search")) {
577
- const char *v;
578
- ARG_require_val(v, (const char *));
579
- zsv_select_add_regex(&data, v);
580
- }
581
- #endif
582
- else if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
583
- data.verbose = 1;
584
- else if (!strcmp(arg, "--unescape"))
585
- data.unescape = 1;
586
- else if (!strcmp(arg, "-w") || !strcmp(arg, "--whitespace-clean"))
587
- data.clean_white = 1;
588
- else if (!strcmp(arg, "--whitespace-clean-no-newline")) {
589
- data.clean_white = 1;
590
- data.whitespace_clean_flags = 1;
591
- } else if (!strcmp(arg, "-W") || !strcmp(arg, "--no-trim"))
592
- data.no_trim_whitespace = 1;
593
- else if (!strcmp(arg, "--sample-every"))
594
- ARG_require_val(data.sample_every_n, atoi);
595
- else if (!strcmp(arg, "--sample-pct"))
596
- ARG_require_val(data.sample_pct, atof);
597
- else if (!strcmp(arg, "--prepend-header")) {
598
- int err = 0;
599
- data.prepend_header = zsv_next_arg(++arg_i, argc, argv, &err);
600
- if (err)
601
- stat = zsv_status_error;
602
- } else if (!strcmp(arg, "--no-header"))
603
- data.no_header = 1;
604
- else if (!strcmp(arg, "-H") || !strcmp(arg, "--head")) {
605
- int val;
606
- ARG_require_val(val, atoi);
607
- data.data_rows_limit = val + 1;
608
- } else if (!strcmp(arg, "-D") || !strcmp(arg, "--skip-data"))
609
- ARG_require_val(data.skip_data_rows, atoi);
610
- #ifndef ZSV_NO_PARALLEL
611
- else if (!strcmp(arg, "-j") || !strcmp(arg, "--jobs"))
612
- ARG_require_val(data.num_chunks, atoi);
613
- else if (!strcmp(arg, "--parallel")) {
614
- data.num_chunks = zsv_get_number_of_cores();
615
- if (data.num_chunks < 2) {
616
- fprintf(stderr, "Warning: --parallel specified but only one core found; using -j 4 instead");
617
- data.num_chunks = 4;
618
- }
619
- }
620
- #endif
621
- else if (!strcmp(arg, "-e")) {
622
- const char *v;
623
- ARG_require_val(v, (const char *));
624
- data.embedded_lineend = *v;
625
- } else if (!strcmp(arg, "-x")) {
626
- const char *v;
627
- ARG_require_val(v, (const char *));
628
- zsv_select_add_exclusion(&data, v);
629
- } else if (*arg == '-')
630
- stat = zsv_printerr(1, "Unrecognized argument: %s", arg);
631
- else if (data.input_path)
632
- stat = zsv_printerr(1, "Input specified twice");
633
- else
634
- data.input_path = arg;
635
- }
636
-
637
- if (stat != zsv_status_ok)
638
- goto zsv_select_main_done;
639
-
640
- // configuration & setup
641
- if (!writer_opts.stream)
642
- writer_opts.stream = stdout;
643
- if (data.sample_pct)
644
- srand(time(0));
645
- if (data.use_header_indexes && (stat = zsv_select_check_exclusions_are_indexes(&data)))
646
- goto zsv_select_main_done;
647
-
648
- #ifndef ZSV_NO_PARALLEL
649
- if (data.num_chunks > 1) {
650
- enum zsv_chunk_status chstat = zsv_chunkable(data.input_path, data.opts);
651
- if (chstat != zsv_chunk_status_ok) {
652
- stat = zsv_printerr(1, "%s", zsv_chunk_status_str(chstat));
653
- goto zsv_select_main_done;
654
- }
655
- }
656
- #endif
657
-
658
- // input stream
659
- if (data.input_path) {
660
- if (!(data.opts->stream = fopen(data.input_path, "rb")))
661
- stat = zsv_printerr(1, "Cannot open %s", data.input_path);
662
- } else {
663
- #ifdef NO_STDIN
664
- stat = zsv_printerr(1, "Input file required");
665
- goto zsv_select_main_done;
666
- #else
667
- data.opts->stream = stdin;
668
- #endif
669
- }
670
-
671
- // auto-fixed column detection
672
- if (data.fixed.autodetect) { // fixed-auto flag
673
- if (data.fixed.count)
674
- stat = zsv_printerr(1, "--fixed-auto cannot be used with --fixed");
675
- else {
676
- size_t bsz = 1024 * 256;
677
- if (!(preview_buff = calloc(bsz, 1)))
678
- stat = zsv_status_memory;
679
- else
680
- stat =
681
- auto_detect_fixed_column_sizes(&data.fixed, data.opts, preview_buff, bsz, &preview_buff_len, opts->verbose);
682
- }
683
- }
684
- if (stat != zsv_status_ok)
685
- goto zsv_select_main_done;
686
-
687
- // parser initialization
688
- if (col_index_arg_i) {
689
- data.col_argv = &argv[col_index_arg_i];
690
- data.col_argc = argc - col_index_arg_i;
691
- }
692
-
693
- data.header_names = calloc(data.opts->max_columns, sizeof(*data.header_names));
694
- data.out2in = calloc(data.opts->max_columns, sizeof(*data.out2in));
695
- data.csv_writer = zsv_writer_new(&writer_opts);
696
-
697
- if (!data.header_names || !data.out2in || !data.csv_writer) {
698
- stat = zsv_status_memory;
699
- goto zsv_select_main_done;
700
- }
701
-
702
- // execution
703
- data.opts->row_handler = zsv_select_header_row;
704
- data.opts->ctx = &data;
705
-
706
- if (zsv_new_with_properties(data.opts, custom_prop_handler, data.input_path, &data.parser) == zsv_status_ok) {
707
- data.any_clean = !data.no_trim_whitespace || data.clean_white || data.embedded_lineend || data.unescape;
708
-
709
- // apply fixed offsets (whether from --fixed arg or --fixed-auto detection)
710
- if (data.fixed.count && zsv_set_fixed_offsets(data.parser, data.fixed.count, data.fixed.offsets) != zsv_status_ok)
711
- data.cancelled = 1;
712
-
713
- unsigned char writer_buff[512];
714
- zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
715
-
716
- zsv_handle_ctrl_c_signal();
717
-
718
- enum zsv_status p_stat = zsv_status_ok;
719
- if (preview_buff_len)
720
- p_stat = zsv_parse_bytes(data.parser, preview_buff, preview_buff_len);
721
-
722
- while (p_stat == zsv_status_ok && !zsv_signal_interrupted && !data.cancelled)
723
- p_stat = zsv_parse_more(data.parser);
724
-
725
- if (p_stat == zsv_status_no_more_input) {
726
- zsv_finish(data.parser);
727
- #ifndef ZSV_NO_PARALLEL
728
- // unlikely, but maybe conceivable if chunk split was not accurate and
729
- // a correctly-split chunk's last row entirely ate the next incorrectly-split chunk
730
- if (data.run_in_parallel && !data.next_row_start)
731
- data.next_row_start = zsv_cum_scanned_length(data.parser) + 1;
732
- #endif
733
- }
734
- zsv_delete(data.parser);
735
-
736
- #ifndef ZSV_NO_PARALLEL
737
- if (data.run_in_parallel) {
738
- // explicitly flush and delete main writer before merge which uses raw fd
739
- zsv_writer_delete(data.csv_writer);
740
- data.csv_writer = NULL;
741
- if (zsv_merge_worker_outputs(&data, writer_opts.stream) != 0)
742
- stat = zsv_status_error;
743
- }
744
- #endif
745
- }
746
-
747
- zsv_select_main_done:
748
- free(preview_buff);
749
- zsv_select_cleanup(&data);
750
- if (writer_opts.stream && writer_opts.stream != stdout)
751
- fclose(writer_opts.stream);
752
- return stat;
753
- }