zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,753 @@
1
+ /*
2
+ * Copyright (C) 2025 Liquidaty and zsv contributors. All rights reserved.
3
+ *
4
+ * This file is part of zsv/lib, distributed under the MIT license as defined at
5
+ * https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ #include <stdio.h>
9
+ #include <assert.h>
10
+ #ifdef _WIN32
11
+ #define _CRT_RAND_S /* for random number generator, used when sampling. must come before including stdlib.h */
12
+ #else
13
+ #include <sys/types.h> // off_t
14
+ #endif
15
+ #include <stdlib.h>
16
+ #include <stdint.h>
17
+ #include <string.h>
18
+ #include <ctype.h>
19
+ #include <time.h>
20
+ #include <stdarg.h>
21
+
22
+ // Added for pthreads and parallel I/O management
23
+ #include <pthread.h>
24
+ #include <string.h> // memcpy, free, etc.
25
+
26
+ #define ZSV_COMMAND select
27
+ #include "zsv_command.h"
28
+
29
+ #include <zsv/utils/writer.h>
30
+ #include <zsv/utils/utf8.h>
31
+ #include <zsv/utils/string.h>
32
+ #include <zsv/utils/mem.h>
33
+ #include <zsv/utils/memmem.h>
34
+ #include <zsv/utils/arg.h>
35
+ #include <zsv/utils/os.h>
36
+ #include <zsv/utils/file.h>
37
+ #include "utils/chunk.h"
38
+
39
+ #include "select/internal.h" // various defines and structs
40
+ #include "select/usage.c" // zsv_select_usage()
41
+ #include "select/rand.c" // demo_random_bw_1_and_100()
42
+ #include "select/fixed.c" // auto_detect_fixed_column_sizes()
43
+ #include "utils/cat.c"
44
+
45
+ // zsv_select_add_search(), zsv_select_search_str_delete()
46
+ #include "select/search.c"
47
+
48
+ // struct zsv_select_regex, zsv_select_add_regex(), zsv_select_regexs_delete()
49
+ #include "select/regex.c"
50
+
51
+ // zsv_select_cell_clean(), zsv_select_row_search_hit()
52
+ #include "select/processing.c"
53
+
54
+ // zsv_select_add_exclusion(), zsv_select_get_header_name(),
55
+ // zsv_select_check_exclusions_are_indexes()
56
+ #include "select/selection.c"
57
+
58
+ #ifndef ZSV_NO_PARALLEL
59
+ #include "select/parallel.c" // zsv_parallel_data_new(), zsv_parallel_data_delete()
60
+
61
+ #define ZSV_SELECT_PARALLEL_MIN_BYTES (1024 * 1024 * 2) // don't parallelize if < 2 MB of data (after header)
62
+ #define ZSV_SELECT_PARALLEL_BUFFER_SZ (1024 * 1024 * 8) // to do: make customizable or dynamic
63
+
64
+ static void zsv_select_data_row(void *ctx);
65
+
66
+ static void zsv_select_data_row_parallel_done(void *ctx) {
67
+ struct zsv_select_data *data = ctx;
68
+ data->next_row_start = zsv_cum_scanned_length(data->parser) - zsv_row_length_raw_bytes(data->parser);
69
+ zsv_abort(data->parser);
70
+ data->cancelled = 1;
71
+ }
72
+ static void zsv_select_data_row_parallel(void *ctx) {
73
+ struct zsv_select_data *data = ctx;
74
+ zsv_select_data_row(ctx);
75
+
76
+ if (UNLIKELY((off_t)zsv_cum_scanned_length(data->parser) >= data->end_offset_limit)) {
77
+ // parse one more row to get accurate next-row start
78
+ zsv_set_row_handler(data->parser, zsv_select_data_row_parallel_done);
79
+ }
80
+ }
81
+
82
+ static void *zsv_select_process_chunk_internal(struct zsv_chunk_data *cdata) {
83
+ if (cdata->start_offset >= cdata->end_offset) {
84
+ cdata->skip = 1;
85
+ return NULL;
86
+ }
87
+
88
+ struct zsv_select_data data = {0}; // local, non-shared zsv_select_data instance
89
+
90
+ // Copy necessary setup data from the global context
91
+ memcpy(&data, cdata->opts->ctx, sizeof(data));
92
+ data.parallel_data = NULL; // clear parallel data pointer in local copy
93
+ data.cancelled = 0; // necessary in case we are re-running due to incorrect chunk start
94
+
95
+ #ifdef HAVE_PCRE2_8
96
+ // duplicate data.search_regexs for thread safety
97
+ if (data.search_regexs)
98
+ data.search_regexs = zsv_select_regexs_dup(data.search_regexs);
99
+ #endif
100
+
101
+ struct zsv_opts opts = {0};
102
+ opts.max_columns = cdata->opts->max_columns;
103
+ opts.max_row_size = cdata->opts->max_row_size;
104
+ opts.delimiter = cdata->opts->delimiter;
105
+ opts.no_quotes = cdata->opts->no_quotes;
106
+ opts.verbose = cdata->opts->verbose;
107
+ opts.malformed_utf8_replace = cdata->opts->malformed_utf8_replace;
108
+ opts.errprintf = cdata->opts->errprintf;
109
+ opts.errf = cdata->opts->errf;
110
+ opts.errclose = cdata->opts->errclose;
111
+ opts.progress = cdata->opts->progress;
112
+
113
+ // set up input
114
+ FILE *stream = fopen(data.input_path, "rb");
115
+ if (!stream) {
116
+ cdata->status = zsv_status_error;
117
+ return NULL;
118
+ }
119
+ fseeko(stream, cdata->start_offset, SEEK_SET);
120
+
121
+ // set up output
122
+ struct zsv_csv_writer_options writer_opts = {0};
123
+
124
+ #ifdef __linux__
125
+ cdata->tmp_output_filename = zsv_get_temp_filename("zsvselect");
126
+ writer_opts.stream = fopen(cdata->tmp_output_filename, "wb");
127
+ #else
128
+ if (!(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ)) &&
129
+ !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 2)) &&
130
+ !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 4)) &&
131
+ !(cdata->tmp_f = zsv_memfile_open(ZSV_SELECT_PARALLEL_BUFFER_SZ / 8)))
132
+ cdata->tmp_f = zsv_memfile_open(0);
133
+ writer_opts.stream = cdata->tmp_f;
134
+ writer_opts.write = (size_t(*)(const void *restrict, size_t, size_t, void *restrict))zsv_memfile_write;
135
+ #endif
136
+
137
+ if (!writer_opts.stream) {
138
+ cdata->status = zsv_status_memory;
139
+ fclose(stream);
140
+ return NULL;
141
+ }
142
+ data.csv_writer = zsv_writer_new(&writer_opts);
143
+
144
+ // initialize parser
145
+ opts.stream = stream;
146
+ opts.row_handler = zsv_select_data_row_parallel;
147
+ opts.ctx = &data;
148
+ data.end_offset_limit = cdata->end_offset - cdata->start_offset; // set chunk boundary
149
+ data.parser = zsv_new(&opts);
150
+
151
+ // process
152
+ enum zsv_status status = zsv_status_ok;
153
+ while (status == zsv_status_ok && !zsv_signal_interrupted && !data.cancelled)
154
+ status = zsv_parse_more(data.parser);
155
+
156
+ #ifndef ZSV_NOPARALLEL
157
+ if (!data.next_row_start)
158
+ // unlikely, but maybe conceivable if chunk split was not accurate and
159
+ // a correctly-split chunk's last row entirely ate the next incorrectly-split chunk
160
+ data.next_row_start = zsv_cum_scanned_length(data.parser) + 1;
161
+ #endif
162
+
163
+ // clean up
164
+ zsv_delete(data.parser);
165
+ #ifdef HAVE_PCRE2_8
166
+ zsv_select_regexs_delete(data.search_regexs);
167
+ #endif
168
+ fflush(stream);
169
+ fclose(stream);
170
+ zsv_writer_delete(data.csv_writer);
171
+ #ifdef __linux__
172
+ fclose(writer_opts.stream);
173
+ #endif
174
+ cdata->actual_next_row_start = data.next_row_start + cdata->start_offset;
175
+ cdata->status = zsv_status_ok;
176
+ return NULL;
177
+ }
178
+
179
+ static void *zsv_select_process_chunk(void *arg) {
180
+ struct zsv_chunk_data *cdata = (struct zsv_chunk_data *)arg;
181
+ return zsv_select_process_chunk_internal(cdata);
182
+ }
183
+ #endif // ZSV_NO_PARALLEL
184
+
185
+ // zsv_select_output_data_row(): output row data (No change needed)
186
+ static void zsv_select_output_data_row(struct zsv_select_data *data) {
187
+ unsigned int cnt = data->output_cols_count;
188
+ char first = 1;
189
+ if (data->prepend_line_number) {
190
+ zsv_writer_cell_zu(data->csv_writer, first, data->data_row_count);
191
+ first = 0;
192
+ }
193
+
194
+ /* print data row */
195
+ for (unsigned int i = 0; i < cnt; i++) { // for each output column
196
+ unsigned int in_ix = data->out2in[i].ix;
197
+ struct zsv_cell cell = zsv_get_cell(data->parser, in_ix);
198
+ if (UNLIKELY(data->any_clean != 0)) {
199
+ // leading/trailing white may have been converted to NULL for regex search
200
+ while (cell.len && *cell.str == '\0')
201
+ cell.str++, cell.len--;
202
+ while (cell.len && cell.str[cell.len - 1] == '\0')
203
+ cell.len--;
204
+ cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
205
+ }
206
+ if (VERY_UNLIKELY(data->distinct == ZSV_SELECT_DISTINCT_MERGE)) {
207
+ if (UNLIKELY(cell.len == 0)) {
208
+ for (struct zsv_select_uint_list *ix = data->out2in[i].merge.indexes; ix; ix = ix->next) {
209
+ unsigned int m_ix = ix->value;
210
+ cell = zsv_get_cell(data->parser, m_ix);
211
+ if (cell.len) {
212
+ if (UNLIKELY(data->any_clean != 0))
213
+ cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
214
+ if (cell.len)
215
+ break;
216
+ }
217
+ }
218
+ }
219
+ }
220
+ zsv_writer_cell(data->csv_writer, first, cell.str, cell.len, cell.quoted);
221
+ first = 0;
222
+ }
223
+ }
224
+
225
+ static void zsv_select_data_row(void *ctx) {
226
+ struct zsv_select_data *data = ctx;
227
+ if (UNLIKELY(zsv_cell_count(data->parser) == 0 || data->cancelled))
228
+ return;
229
+
230
+ data->data_row_count++;
231
+
232
+ // check if we should skip this row
233
+ data->skip_this_row = 0;
234
+ if (UNLIKELY(data->skip_data_rows)) {
235
+ data->skip_data_rows--;
236
+ data->skip_this_row = 1;
237
+ } else if (UNLIKELY(data->sample_every_n || data->sample_pct)) {
238
+ data->skip_this_row = 1;
239
+ if (data->sample_every_n && data->data_row_count % data->sample_every_n == 1)
240
+ data->skip_this_row = 0;
241
+ if (data->sample_pct && demo_random_bw_1_and_100() <= data->sample_pct)
242
+ data->skip_this_row = 0;
243
+ }
244
+
245
+ if (LIKELY(!data->skip_this_row)) {
246
+ // if we have a search filter, check that
247
+ char skip = 0;
248
+ skip = !zsv_select_row_search_hit(data);
249
+ if (!skip) {
250
+
251
+ // print the data row
252
+ zsv_select_output_data_row(data);
253
+ if (UNLIKELY(data->data_rows_limit > 0))
254
+ if (data->data_row_count + 1 >= data->data_rows_limit)
255
+ data->cancelled = 1;
256
+ }
257
+ }
258
+ if (data->data_row_count % 25000 == 0 && data->verbose)
259
+ fprintf(stderr, "Processed %zu rows\n", data->data_row_count);
260
+ }
261
+
262
+ static void zsv_select_print_header_row(struct zsv_select_data *data) {
263
+ if (data->no_header)
264
+ return;
265
+ zsv_writer_cell_prepend(data->csv_writer, (const unsigned char *)data->prepend_header);
266
+ if (data->prepend_line_number)
267
+ zsv_writer_cell_s(data->csv_writer, 1, (const unsigned char *)"#", 0);
268
+ for (unsigned int i = 0; i < data->output_cols_count; i++) {
269
+ unsigned char *header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
270
+ zsv_writer_cell_s(data->csv_writer, i == 0 && !data->prepend_line_number, header_name, 1);
271
+ }
272
+ zsv_writer_cell_prepend(data->csv_writer, NULL);
273
+ }
274
+
275
+ #ifndef ZSV_NO_PARALLEL
276
+ static int zsv_setup_parallel_chunks(struct zsv_select_data *data, const char *path, size_t header_row_end) {
277
+ if (data->num_chunks <= 1 || !path || !strcmp(path, "-")) {
278
+ data->run_in_parallel = 0;
279
+ return 0;
280
+ }
281
+
282
+ struct zsv_chunk_position *offsets =
283
+ zsv_guess_file_chunks(path, data->num_chunks, ZSV_SELECT_PARALLEL_MIN_BYTES, header_row_end + 1
284
+ #ifndef ZSV_NO_ONLY_CRLF
285
+ ,
286
+ data->opts->only_crlf_rowend
287
+ #endif
288
+ );
289
+ if (!offsets)
290
+ return -1; // fall back to serial
291
+
292
+ if (!(data->parallel_data = zsv_parallel_data_new(data->num_chunks))) {
293
+ zsv_free_chunks(offsets);
294
+ fprintf(stderr, "Insufficient memory to parallelize!\n");
295
+ return zsv_status_memory;
296
+ }
297
+
298
+ data->run_in_parallel = 1;
299
+ data->parallel_data->main_data = data;
300
+ data->end_offset_limit = offsets[0].end;
301
+
302
+ for (unsigned int i = 0; i < data->num_chunks; i++) {
303
+ data->parallel_data->chunk_data[i].start_offset = offsets[i].start;
304
+ data->parallel_data->chunk_data[i].end_offset = offsets[i].end;
305
+ if (data->opts->verbose)
306
+ fprintf(stderr, "Chunk %i: %zu - %zu\n", i, (size_t)offsets[i].start, (size_t)offsets[i].end);
307
+ }
308
+ zsv_free_chunks(offsets);
309
+ return 0;
310
+ }
311
+ #endif // ZSV_NO_PARALLEL
312
+
313
+ static void zsv_select_header_finish(struct zsv_select_data *data) {
314
+ if (zsv_select_set_output_columns(data)) {
315
+ data->cancelled = 1;
316
+ return;
317
+ }
318
+ #ifndef ZSV_NO_PARALLEL
319
+ // set up parallelization; on error, fall back to serial
320
+ // TO DO: option to exit on error (instead of fall back)
321
+ if (data->input_path && data->num_chunks > 1) {
322
+ size_t header_row_end = zsv_cum_scanned_length(data->parser);
323
+ zsv_setup_parallel_chunks(data, data->input_path, header_row_end);
324
+ }
325
+ if (data->opts->verbose)
326
+ fprintf(stderr, "Running %s\n", data->run_in_parallel ? "parallel" : "single-threaded");
327
+
328
+ if (data->run_in_parallel) {
329
+ struct zsv_parallel_data *pdata = data->parallel_data;
330
+ zsv_select_print_header_row(data);
331
+
332
+ // start worker threads
333
+ for (unsigned int i = 1; i < data->num_chunks; i++) {
334
+ struct zsv_chunk_data *cdata = &pdata->chunk_data[i];
335
+ cdata->id = i;
336
+ cdata->opts = data->opts;
337
+
338
+ int create_status = pthread_create(&pdata->threads[i - 1], NULL, zsv_select_process_chunk, cdata);
339
+ if (create_status != 0) {
340
+ data->cancelled = 1;
341
+ zsv_printerr(1, "Error creating worker thread for chunk %d: %s", i, strerror(create_status));
342
+ return;
343
+ }
344
+ }
345
+
346
+ // main thread processes chunk 1
347
+ zsv_set_row_handler(data->parser, zsv_select_data_row_parallel);
348
+ } else
349
+ #endif
350
+ {
351
+ // no parallelization
352
+ zsv_select_print_header_row(data);
353
+ zsv_set_row_handler(data->parser, zsv_select_data_row);
354
+ }
355
+ }
356
+
357
+ static void zsv_select_header_row(void *ctx) {
358
+ struct zsv_select_data *data = ctx;
359
+
360
+ if (data->cancelled)
361
+ return;
362
+
363
+ unsigned int cols = zsv_cell_count(data->parser);
364
+ unsigned int max_header_ix = 0;
365
+ for (unsigned int i = 0; i < cols; i++) {
366
+ struct zsv_cell cell = zsv_get_cell(data->parser, i);
367
+ if (UNLIKELY(data->any_clean != 0))
368
+ cell.str = zsv_select_cell_clean(data, cell.str, &cell.quoted, &cell.len);
369
+ if (i < data->opts->max_columns) {
370
+ data->header_names[i] = zsv_memdup(cell.str, cell.len);
371
+ max_header_ix = i + 1;
372
+ }
373
+ }
374
+
375
+ // in case we want to make this an option later
376
+ char trim_trailing_columns = 1;
377
+ if (!trim_trailing_columns)
378
+ max_header_ix = cols;
379
+
380
+ if (max_header_ix > data->header_name_count)
381
+ data->header_name_count = max_header_ix;
382
+
383
+ zsv_select_header_finish(data);
384
+ }
385
+
386
+ static void zsv_select_cleanup(struct zsv_select_data *data) {
387
+ if (data->opts->stream && data->opts->stream != stdin)
388
+ fclose(data->opts->stream);
389
+
390
+ zsv_writer_delete(data->csv_writer);
391
+ zsv_select_search_str_delete(data->search_strings);
392
+ #ifdef HAVE_PCRE2_8
393
+ zsv_select_regexs_delete(data->search_regexs);
394
+ #endif
395
+
396
+ if (data->distinct == ZSV_SELECT_DISTINCT_MERGE) {
397
+ for (unsigned int i = 0; i < data->output_cols_count; i++) {
398
+ for (struct zsv_select_uint_list *next, *ix = data->out2in[i].merge.indexes; ix; ix = next) {
399
+ next = ix->next;
400
+ free(ix);
401
+ }
402
+ }
403
+ }
404
+ free(data->out2in);
405
+
406
+ for (unsigned int i = 0; i < data->header_name_count; i++)
407
+ free(data->header_names[i]);
408
+ free(data->header_names);
409
+
410
+ free(data->fixed.offsets);
411
+
412
+ #ifndef ZSV_NO_PARALLEL
413
+ if (data->run_in_parallel)
414
+ zsv_parallel_data_delete(data->parallel_data);
415
+ #endif
416
+ }
417
+
418
+ #define ARG_require_val(tgt, conv_func) \
419
+ do { \
420
+ if (++arg_i >= argc) { \
421
+ stat = zsv_printerr(1, "%s option requires parameter", argv[arg_i - 1]); \
422
+ goto zsv_select_main_done; \
423
+ } \
424
+ tgt = conv_func(argv[arg_i]); \
425
+ } while (0)
426
+
427
+ #ifndef ZSV_NO_PARALLEL
428
+ static int zsv_merge_worker_outputs(struct zsv_select_data *data, FILE *dest_stream) {
429
+ if (!data->run_in_parallel || !data->parallel_data)
430
+ return 0;
431
+
432
+ fflush(dest_stream);
433
+ #ifdef __linux__
434
+ int out_fd = fileno(dest_stream);
435
+ #endif
436
+ int status = 0;
437
+
438
+ for (unsigned int i = 0; i < data->num_chunks - 1; i++) {
439
+ pthread_join(data->parallel_data->threads[i], NULL);
440
+
441
+ struct zsv_chunk_data *next_chunk = &data->parallel_data->chunk_data[i + 1];
442
+ off_t actual_next_row_start =
443
+ i == 0 ? data->next_row_start : data->parallel_data->chunk_data[i].actual_next_row_start;
444
+ off_t expected_next_row_start = next_chunk->start_offset;
445
+ if (actual_next_row_start > expected_next_row_start) {
446
+ if (data->opts->verbose) {
447
+ fprintf(stderr, "Chunk overlap detected (Prev End: %zu, Next Start: %zu). Reprocessing chunk %d.\n",
448
+ (size_t)actual_next_row_start, (size_t)expected_next_row_start, i + 1);
449
+ }
450
+
451
+ // clean up invalid results from the worker thread
452
+ zsv_chunk_data_clear_output(next_chunk);
453
+
454
+ // adjust the start offset to the actual next row start
455
+ next_chunk->start_offset = actual_next_row_start;
456
+
457
+ // reprocess synchronously on the main thread
458
+ zsv_select_process_chunk_internal(next_chunk);
459
+
460
+ if (next_chunk->status != zsv_status_ok) // reprocessing failed!
461
+ status = zsv_status_error;
462
+ }
463
+ }
464
+
465
+ // join all of the output files into a single output file
466
+ for (unsigned int i = 1; i < data->num_chunks && status == 0; i++) {
467
+ struct zsv_chunk_data *c = &data->parallel_data->chunk_data[i];
468
+ if (c->skip)
469
+ continue;
470
+ #ifdef __linux__
471
+ int in_fd = open(c->tmp_output_filename, O_RDONLY);
472
+ if (in_fd < 0) {
473
+ zsv_printerr(1, "Error opening chunk %s: %s", c->tmp_output_filename, strerror(errno));
474
+ status = zsv_status_error;
475
+ break;
476
+ }
477
+
478
+ struct stat st;
479
+ if (fstat(in_fd, &st) == 0) {
480
+ long copied = zsv_concatenate_copy(out_fd, in_fd, st.st_size);
481
+ if (copied != st.st_size) {
482
+ zsv_printerr(1, "Warning: Partial copy chunk %d (%lli/%lli)", i, copied, (long long)st.st_size);
483
+ status = zsv_status_error;
484
+ }
485
+ } else {
486
+ status = zsv_status_error;
487
+ }
488
+ close(in_fd);
489
+ #else
490
+ zsv_memfile_rewind(c->tmp_f);
491
+ if (zsv_copy_filelike_ptr(
492
+ c->tmp_f, (size_t(*)(void *restrict ptr, size_t size, size_t nitems, void *restrict stream))zsv_memfile_read,
493
+ dest_stream,
494
+ (size_t(*)(const void *restrict ptr, size_t size, size_t nitems, void *restrict stream))fwrite)) {
495
+ perror("zsv temp mem file");
496
+ status = zsv_status_error;
497
+ }
498
+ #endif
499
+ }
500
+ return status;
501
+ }
502
+ #endif
503
+
504
+ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
505
+ struct zsv_prop_handler *custom_prop_handler) {
506
+ if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
507
+ zsv_select_usage();
508
+ return zsv_status_ok;
509
+ }
510
+
511
+ struct zsv_select_data data = {0};
512
+ data.opts = opts;
513
+ struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
514
+ int col_index_arg_i = 0;
515
+ unsigned char *preview_buff = NULL;
516
+ size_t preview_buff_len = 0;
517
+ enum zsv_status stat = zsv_status_ok;
518
+
519
+ for (int arg_i = 1; stat == zsv_status_ok && arg_i < argc; arg_i++) {
520
+ const char *arg = argv[arg_i];
521
+ if (!strcmp(arg, "--")) {
522
+ col_index_arg_i = arg_i + 1;
523
+ break;
524
+ }
525
+
526
+ if (!strcmp(arg, "-b") || !strcmp(arg, "--with-bom"))
527
+ writer_opts.with_bom = 1;
528
+ else if (!strcmp(arg, "--fixed-auto-max-lines"))
529
+ ARG_require_val(data.fixed.max_lines, atoi);
530
+ else if (!strcmp(arg, "--fixed-auto"))
531
+ data.fixed.autodetect = 1;
532
+ else if (!strcmp(arg, "--fixed")) {
533
+ if (++arg_i >= argc) {
534
+ stat = zsv_printerr(1, "--fixed requires val");
535
+ goto zsv_select_main_done;
536
+ }
537
+ data.fixed.count = 1;
538
+ for (const char *s = argv[arg_i]; *s; s++)
539
+ if (*s == ',')
540
+ data.fixed.count++;
541
+ free(data.fixed.offsets);
542
+ data.fixed.offsets = calloc(data.fixed.count, sizeof(*data.fixed.offsets));
543
+ if (!data.fixed.offsets) {
544
+ stat = zsv_printerr(1, "Out of memory!");
545
+ goto zsv_select_main_done;
546
+ }
547
+ size_t count = 0;
548
+ char *dup = strdup(argv[arg_i]), *tok;
549
+ for (tok = strtok(dup, ","); tok && count < data.fixed.count; tok = strtok(NULL, ",")) {
550
+ if (sscanf(tok, "%zu", &data.fixed.offsets[count++]) != 1)
551
+ stat = zsv_printerr(1, "Invalid offset: %s", tok);
552
+ }
553
+ free(dup);
554
+ } else if (!strcmp(arg, "--distinct"))
555
+ data.distinct = 1;
556
+ else if (!strcmp(arg, "--merge"))
557
+ data.distinct = ZSV_SELECT_DISTINCT_MERGE;
558
+ else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) {
559
+ if (writer_opts.stream && writer_opts.stream != stdout)
560
+ stat = zsv_printerr(1, "Output specified twice");
561
+ else {
562
+ ARG_require_val(arg, (const char *));
563
+ if (!(writer_opts.stream = fopen(arg, "wb")))
564
+ stat = zsv_printerr(1, "Unable to open %s", arg);
565
+ }
566
+ } else if (!strcmp(arg, "-N") || !strcmp(arg, "--line-number"))
567
+ data.prepend_line_number = 1;
568
+ else if (!strcmp(arg, "-n"))
569
+ data.use_header_indexes = 1;
570
+ else if (!strcmp(arg, "-s") || !strcmp(arg, "--search")) {
571
+ const char *v;
572
+ ARG_require_val(v, (const char *));
573
+ zsv_select_add_search(&data, v);
574
+ }
575
+ #ifdef HAVE_PCRE2_8
576
+ else if (!strcmp(arg, "--regex-search")) {
577
+ const char *v;
578
+ ARG_require_val(v, (const char *));
579
+ zsv_select_add_regex(&data, v);
580
+ }
581
+ #endif
582
+ else if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
583
+ data.verbose = 1;
584
+ else if (!strcmp(arg, "--unescape"))
585
+ data.unescape = 1;
586
+ else if (!strcmp(arg, "-w") || !strcmp(arg, "--whitespace-clean"))
587
+ data.clean_white = 1;
588
+ else if (!strcmp(arg, "--whitespace-clean-no-newline")) {
589
+ data.clean_white = 1;
590
+ data.whitespace_clean_flags = 1;
591
+ } else if (!strcmp(arg, "-W") || !strcmp(arg, "--no-trim"))
592
+ data.no_trim_whitespace = 1;
593
+ else if (!strcmp(arg, "--sample-every"))
594
+ ARG_require_val(data.sample_every_n, atoi);
595
+ else if (!strcmp(arg, "--sample-pct"))
596
+ ARG_require_val(data.sample_pct, atof);
597
+ else if (!strcmp(arg, "--prepend-header")) {
598
+ int err = 0;
599
+ data.prepend_header = zsv_next_arg(++arg_i, argc, argv, &err);
600
+ if (err)
601
+ stat = zsv_status_error;
602
+ } else if (!strcmp(arg, "--no-header"))
603
+ data.no_header = 1;
604
+ else if (!strcmp(arg, "-H") || !strcmp(arg, "--head")) {
605
+ int val;
606
+ ARG_require_val(val, atoi);
607
+ data.data_rows_limit = val + 1;
608
+ } else if (!strcmp(arg, "-D") || !strcmp(arg, "--skip-data"))
609
+ ARG_require_val(data.skip_data_rows, atoi);
610
+ #ifndef ZSV_NO_PARALLEL
611
+ else if (!strcmp(arg, "-j") || !strcmp(arg, "--jobs"))
612
+ ARG_require_val(data.num_chunks, atoi);
613
+ else if (!strcmp(arg, "--parallel")) {
614
+ data.num_chunks = zsv_get_number_of_cores();
615
+ if (data.num_chunks < 2) {
616
+ fprintf(stderr, "Warning: --parallel specified but only one core found; using -j 4 instead");
617
+ data.num_chunks = 4;
618
+ }
619
+ }
620
+ #endif
621
+ else if (!strcmp(arg, "-e")) {
622
+ const char *v;
623
+ ARG_require_val(v, (const char *));
624
+ data.embedded_lineend = *v;
625
+ } else if (!strcmp(arg, "-x")) {
626
+ const char *v;
627
+ ARG_require_val(v, (const char *));
628
+ zsv_select_add_exclusion(&data, v);
629
+ } else if (*arg == '-')
630
+ stat = zsv_printerr(1, "Unrecognized argument: %s", arg);
631
+ else if (data.input_path)
632
+ stat = zsv_printerr(1, "Input specified twice");
633
+ else
634
+ data.input_path = arg;
635
+ }
636
+
637
+ if (stat != zsv_status_ok)
638
+ goto zsv_select_main_done;
639
+
640
+ // configuration & setup
641
+ if (!writer_opts.stream)
642
+ writer_opts.stream = stdout;
643
+ if (data.sample_pct)
644
+ srand(time(0));
645
+ if (data.use_header_indexes && (stat = zsv_select_check_exclusions_are_indexes(&data)))
646
+ goto zsv_select_main_done;
647
+
648
+ #ifndef ZSV_NO_PARALLEL
649
+ if (data.num_chunks > 1) {
650
+ enum zsv_chunk_status chstat = zsv_chunkable(data.input_path, data.opts);
651
+ if (chstat != zsv_chunk_status_ok) {
652
+ stat = zsv_printerr(1, "%s", zsv_chunk_status_str(chstat));
653
+ goto zsv_select_main_done;
654
+ }
655
+ }
656
+ #endif
657
+
658
+ // input stream
659
+ if (data.input_path) {
660
+ if (!(data.opts->stream = fopen(data.input_path, "rb")))
661
+ stat = zsv_printerr(1, "Cannot open %s", data.input_path);
662
+ } else {
663
+ #ifdef NO_STDIN
664
+ stat = zsv_printerr(1, "Input file required");
665
+ goto zsv_select_main_done;
666
+ #else
667
+ data.opts->stream = stdin;
668
+ #endif
669
+ }
670
+
671
+ // auto-fixed column detection
672
+ if (data.fixed.autodetect) { // fixed-auto flag
673
+ if (data.fixed.count)
674
+ stat = zsv_printerr(1, "--fixed-auto cannot be used with --fixed");
675
+ else {
676
+ size_t bsz = 1024 * 256;
677
+ if (!(preview_buff = calloc(bsz, 1)))
678
+ stat = zsv_status_memory;
679
+ else
680
+ stat =
681
+ auto_detect_fixed_column_sizes(&data.fixed, data.opts, preview_buff, bsz, &preview_buff_len, opts->verbose);
682
+ }
683
+ }
684
+ if (stat != zsv_status_ok)
685
+ goto zsv_select_main_done;
686
+
687
+ // parser initialization
688
+ if (col_index_arg_i) {
689
+ data.col_argv = &argv[col_index_arg_i];
690
+ data.col_argc = argc - col_index_arg_i;
691
+ }
692
+
693
+ data.header_names = calloc(data.opts->max_columns, sizeof(*data.header_names));
694
+ data.out2in = calloc(data.opts->max_columns, sizeof(*data.out2in));
695
+ data.csv_writer = zsv_writer_new(&writer_opts);
696
+
697
+ if (!data.header_names || !data.out2in || !data.csv_writer) {
698
+ stat = zsv_status_memory;
699
+ goto zsv_select_main_done;
700
+ }
701
+
702
+ // execution
703
+ data.opts->row_handler = zsv_select_header_row;
704
+ data.opts->ctx = &data;
705
+
706
+ if (zsv_new_with_properties(data.opts, custom_prop_handler, data.input_path, &data.parser) == zsv_status_ok) {
707
+ data.any_clean = !data.no_trim_whitespace || data.clean_white || data.embedded_lineend || data.unescape;
708
+
709
+ // apply fixed offsets (whether from --fixed arg or --fixed-auto detection)
710
+ if (data.fixed.count && zsv_set_fixed_offsets(data.parser, data.fixed.count, data.fixed.offsets) != zsv_status_ok)
711
+ data.cancelled = 1;
712
+
713
+ unsigned char writer_buff[512];
714
+ zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
715
+
716
+ zsv_handle_ctrl_c_signal();
717
+
718
+ enum zsv_status p_stat = zsv_status_ok;
719
+ if (preview_buff_len)
720
+ p_stat = zsv_parse_bytes(data.parser, preview_buff, preview_buff_len);
721
+
722
+ while (p_stat == zsv_status_ok && !zsv_signal_interrupted && !data.cancelled)
723
+ p_stat = zsv_parse_more(data.parser);
724
+
725
+ if (p_stat == zsv_status_no_more_input) {
726
+ zsv_finish(data.parser);
727
+ #ifndef ZSV_NO_PARALLEL
728
+ // unlikely, but maybe conceivable if chunk split was not accurate and
729
+ // a correctly-split chunk's last row entirely ate the next incorrectly-split chunk
730
+ if (data.run_in_parallel && !data.next_row_start)
731
+ data.next_row_start = zsv_cum_scanned_length(data.parser) + 1;
732
+ #endif
733
+ }
734
+ zsv_delete(data.parser);
735
+
736
+ #ifndef ZSV_NO_PARALLEL
737
+ if (data.run_in_parallel) {
738
+ // explicitly flush and delete main writer before merge which uses raw fd
739
+ zsv_writer_delete(data.csv_writer);
740
+ data.csv_writer = NULL;
741
+ if (zsv_merge_worker_outputs(&data, writer_opts.stream) != 0)
742
+ stat = zsv_status_error;
743
+ }
744
+ #endif
745
+ }
746
+
747
+ zsv_select_main_done:
748
+ free(preview_buff);
749
+ zsv_select_cleanup(&data);
750
+ if (writer_opts.stream && writer_opts.stream != stdout)
751
+ fclose(writer_opts.stream);
752
+ return stat;
753
+ }