zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,812 @@
1
+ /*
2
+ * Copyright (C) 2021 Liquidaty and zsv contributors. All rights reserved.
3
+ *
4
+ * This file is part of zsv/lib, distributed under the MIT license as defined at
5
+ * https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ #include <stdio.h>
9
+ #include <assert.h>
10
+ #ifdef _WIN32
11
+ #define _CRT_RAND_S // for random number generator, used when sampling. must come before including stdlib.h
12
+ #endif
13
+ #include <stdlib.h>
14
+ #include <stdint.h>
15
+ #include <string.h>
16
+ #include <ctype.h>
17
+ #include <time.h>
18
+ #include <stdarg.h>
19
+
20
+ #define ZSV_COMMAND select_pull
21
+ #include "zsv_command.h"
22
+
23
+ #include <zsv/utils/writer.h>
24
+ #include <zsv/utils/utf8.h>
25
+ #include <zsv/utils/string.h>
26
+ #include <zsv/utils/mem.h>
27
+ #include <zsv/utils/arg.h>
28
+
29
+ struct zsv_select_search_str {
30
+ struct zsv_select_search_str *next;
31
+ const char *value;
32
+ size_t len;
33
+ };
34
+
35
+ static void zsv_select_search_str_delete(struct zsv_select_search_str *ss) {
36
+ for (struct zsv_select_search_str *next; ss; ss = next) {
37
+ next = ss->next;
38
+ free(ss);
39
+ }
40
+ }
41
+
42
+ struct zsv_select_uint_list {
43
+ struct zsv_select_uint_list *next;
44
+ unsigned int value;
45
+ };
46
+
47
+ struct zsv_select_data {
48
+ FILE *in;
49
+ unsigned int current_column_ix;
50
+ size_t data_row_count;
51
+
52
+ struct zsv_opts *opts;
53
+ unsigned int errcount;
54
+
55
+ unsigned int output_col_index; // num of cols printed in current row
56
+
57
+ // output columns:
58
+ const char **col_argv;
59
+ int col_argc;
60
+ char *cols_to_print; // better: bitfield
61
+
62
+ struct {
63
+ unsigned int ix; // index of the input column to be output
64
+ struct { // merge data: only used with --merge
65
+ struct zsv_select_uint_list *indexes, **last_index;
66
+ } merge;
67
+ } *out2in; // array of .output_cols_count length; out2in[x] = y where x = output ix, y = input info
68
+
69
+ unsigned int output_cols_count; // total count of output columns
70
+
71
+ #define MAX_EXCLUSIONS 1024
72
+ const unsigned char *exclusions[MAX_EXCLUSIONS];
73
+ unsigned int exclusion_count;
74
+
75
+ unsigned int header_name_count;
76
+ unsigned char **header_names;
77
+
78
+ const char *prepend_header; // --prepend-header
79
+
80
+ char header_finished;
81
+
82
+ char embedded_lineend;
83
+
84
+ double sample_pct;
85
+
86
+ unsigned char sample_every_n;
87
+
88
+ size_t data_rows_limit;
89
+ size_t skip_data_rows;
90
+
91
+ struct zsv_select_search_str *search_strings;
92
+
93
+ zsv_csv_writer csv_writer;
94
+
95
+ size_t overflow_size;
96
+
97
+ /*
98
+ struct {
99
+ size_t *offsets;
100
+ size_t count;
101
+ } fixed;
102
+ */
103
+ unsigned char whitespace_clean_flags;
104
+
105
+ unsigned char print_all_cols : 1;
106
+ unsigned char use_header_indexes : 1;
107
+ unsigned char no_trim_whitespace : 1;
108
+ unsigned char cancelled : 1;
109
+ unsigned char skip_this_row : 1;
110
+ unsigned char verbose : 1;
111
+ unsigned char clean_white : 1;
112
+ unsigned char prepend_line_number : 1;
113
+
114
+ unsigned char any_clean : 1;
115
+ #define ZSV_SELECT_DISTINCT_MERGE 2
116
+ unsigned char distinct : 2; // 1 = ignore subsequent cols, ZSV_SELECT_DISTINCT_MERGE = merge subsequent cols (first
117
+ // non-null value)
118
+
119
+ unsigned char no_header : 1;
120
+ unsigned char _ : 4;
121
+ };
122
+
123
+ enum zsv_select_column_index_selection_type {
124
+ zsv_select_column_index_selection_type_none = 0,
125
+ zsv_select_column_index_selection_type_single,
126
+ zsv_select_column_index_selection_type_range,
127
+ zsv_select_column_index_selection_type_lower_bounded
128
+ };
129
+
130
+ static enum zsv_select_column_index_selection_type zsv_select_column_index_selection(const unsigned char *arg,
131
+ unsigned *lo, unsigned *hi);
132
+
133
+ static inline void zsv_select_add_exclusion(struct zsv_select_data *data, const char *name) {
134
+ if (data->exclusion_count < MAX_EXCLUSIONS)
135
+ data->exclusions[data->exclusion_count++] = (const unsigned char *)name;
136
+ }
137
+
138
+ static inline unsigned char *zsv_select_get_header_name(struct zsv_select_data *data, unsigned in_ix) {
139
+ if (in_ix < data->header_name_count)
140
+ return data->header_names[in_ix];
141
+ return NULL;
142
+ }
143
+
144
+ static inline char zsv_select_excluded_current_header_name(struct zsv_select_data *data, unsigned in_ix) {
145
+ if (data->exclusion_count) {
146
+ unsigned char *header_name = zsv_select_get_header_name(data, in_ix);
147
+ if (data->use_header_indexes) {
148
+ for (unsigned int ix = 0; ix < data->exclusion_count; ix++) {
149
+ unsigned i, j;
150
+ switch (zsv_select_column_index_selection(data->exclusions[ix], &i, &j)) {
151
+ case zsv_select_column_index_selection_type_none:
152
+ // not expected!
153
+ break;
154
+ case zsv_select_column_index_selection_type_single:
155
+ if (in_ix + 1 == i)
156
+ return 1;
157
+ break;
158
+ case zsv_select_column_index_selection_type_range:
159
+ if (i <= in_ix + 1 && in_ix + 1 <= j)
160
+ return 1;
161
+ break;
162
+ case zsv_select_column_index_selection_type_lower_bounded:
163
+ if (i <= in_ix + 1)
164
+ return 1;
165
+ break;
166
+ }
167
+ }
168
+ } else {
169
+ if (header_name) {
170
+ for (unsigned int i = 0; i < data->exclusion_count; i++)
171
+ if (!zsv_stricmp(header_name, data->exclusions[i]))
172
+ return 1;
173
+ }
174
+ }
175
+ }
176
+ return 0;
177
+ }
178
+
179
+ // zsv_select_find_header(): return 1-based index, or 0 if not found
180
+ static int zsv_select_find_header(struct zsv_select_data *data, const unsigned char *header_name) {
181
+ if (header_name) {
182
+ for (unsigned int i = 0; i < data->output_cols_count; i++) {
183
+ unsigned char *prior_header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
184
+ if (prior_header_name && !zsv_stricmp(header_name, prior_header_name))
185
+ return i + 1;
186
+ }
187
+ }
188
+ return 0;
189
+ }
190
+
191
+ static int zsv_select_add_output_col(struct zsv_select_data *data, unsigned in_ix) {
192
+ int err = 0;
193
+ if (data->output_cols_count < data->opts->max_columns) {
194
+ int found = zsv_select_find_header(data, zsv_select_get_header_name(data, in_ix));
195
+ if (data->distinct && found) {
196
+ if (data->distinct == ZSV_SELECT_DISTINCT_MERGE) {
197
+ // add this index
198
+ struct zsv_select_uint_list *ix = calloc(1, sizeof(*ix));
199
+ if (!ix)
200
+ err = zsv_printerr(1, "Out of memory!\n");
201
+ else {
202
+ ix->value = in_ix;
203
+ if (!data->out2in[found - 1].merge.indexes)
204
+ data->out2in[found - 1].merge.indexes = ix;
205
+ else
206
+ *data->out2in[found - 1].merge.last_index = ix;
207
+ data->out2in[found - 1].merge.last_index = &ix->next;
208
+ }
209
+ }
210
+ return err;
211
+ }
212
+ if (zsv_select_excluded_current_header_name(data, in_ix))
213
+ return err;
214
+ data->out2in[data->output_cols_count++].ix = in_ix;
215
+ }
216
+ return err;
217
+ }
218
+
219
+ // not very fast, but we don't need it to be
220
+ static inline unsigned int str_array_ifind(const unsigned char *needle, unsigned char *haystack[], unsigned hay_count) {
221
+ for (unsigned int i = 0; i < hay_count; i++) {
222
+ if (!(needle && *needle) && !(haystack[i] && *haystack[i]))
223
+ return i + 1;
224
+ if (!(needle && *needle && haystack[i] && *haystack[i]))
225
+ continue;
226
+ if (!zsv_stricmp(needle, haystack[i]))
227
+ return i + 1;
228
+ }
229
+ return 0;
230
+ }
231
+
232
+ static int zsv_select_set_output_columns(struct zsv_select_data *data) {
233
+ int err = 0;
234
+ unsigned int header_name_count = data->header_name_count;
235
+ if (!data->col_argc) {
236
+ for (unsigned int i = 0; !err && i < header_name_count; i++)
237
+ err = zsv_select_add_output_col(data, i);
238
+ } else if (data->use_header_indexes) {
239
+ for (int arg_i = 0; !err && arg_i < data->col_argc; arg_i++) {
240
+ const char *arg = data->col_argv[arg_i];
241
+ unsigned i, j;
242
+ switch (zsv_select_column_index_selection((const unsigned char *)arg, &i, &j)) {
243
+ case zsv_select_column_index_selection_type_none:
244
+ zsv_printerr(1, "Invalid column index: %s", arg);
245
+ err = -1;
246
+ break;
247
+ case zsv_select_column_index_selection_type_single:
248
+ err = zsv_select_add_output_col(data, i - 1);
249
+ break;
250
+ case zsv_select_column_index_selection_type_range:
251
+ while (i <= j && i < data->opts->max_columns) {
252
+ err = zsv_select_add_output_col(data, i - 1);
253
+ i++;
254
+ }
255
+ break;
256
+ case zsv_select_column_index_selection_type_lower_bounded:
257
+ if (i) {
258
+ for (unsigned int k = i - 1; !err && k < header_name_count; k++)
259
+ err = zsv_select_add_output_col(data, k);
260
+ }
261
+ break;
262
+ }
263
+ }
264
+ } else { // using header names
265
+ for (int arg_i = 0; !err && arg_i < data->col_argc; arg_i++) {
266
+ // find the location of the matching header name, if any
267
+ unsigned int in_pos =
268
+ str_array_ifind((const unsigned char *)data->col_argv[arg_i], data->header_names, header_name_count);
269
+ if (!in_pos) {
270
+ fprintf(stderr, "Column %s not found\n", data->col_argv[arg_i]);
271
+ err = -1;
272
+ } else
273
+ err = zsv_select_add_output_col(data, in_pos - 1);
274
+ }
275
+ }
276
+ return err;
277
+ }
278
+
279
+ static void zsv_select_add_search(struct zsv_select_data *data, const char *value) {
280
+ struct zsv_select_search_str *ss = calloc(1, sizeof(*ss));
281
+ ss->value = value;
282
+ ss->len = value ? strlen(value) : 0;
283
+ ss->next = data->search_strings;
284
+ data->search_strings = ss;
285
+ }
286
+
287
+ #ifndef NDEBUG
288
+ __attribute__((always_inline)) static inline
289
+ #endif
290
+ unsigned char *
291
+ zsv_select_cell_clean(struct zsv_select_data *data, unsigned char *utf8_value, char quoted, size_t *lenp) {
292
+ size_t len = *lenp;
293
+ // to do: option to replace or warn non-printable chars 0 - 31:
294
+ // vectorized scan
295
+ // replace or warn if found
296
+
297
+ if (UNLIKELY(!data->no_trim_whitespace))
298
+ utf8_value = (unsigned char *)zsv_strtrim(utf8_value, &len);
299
+
300
+ if (UNLIKELY(data->clean_white))
301
+ len = zsv_strwhite(utf8_value, len, data->whitespace_clean_flags); // to do: zsv_clean
302
+
303
+ if (UNLIKELY(data->embedded_lineend && quoted)) {
304
+ unsigned char *tmp;
305
+ const char *to_replace[] = {"\r\n", "\r", "\n"};
306
+ for (int i = 0; i < 3; i++) {
307
+ while ((tmp = memmem(utf8_value, len, to_replace[i], strlen(to_replace[i])))) {
308
+ if (strlen(to_replace[i]) == 1)
309
+ *tmp = data->embedded_lineend;
310
+ else {
311
+ size_t right_len = utf8_value + len - tmp;
312
+ memmove(tmp + 1, tmp + 2, right_len - 2);
313
+ *tmp = data->embedded_lineend;
314
+ len--;
315
+ }
316
+ }
317
+ }
318
+ if (data->no_trim_whitespace)
319
+ utf8_value = (unsigned char *)zsv_strtrim(utf8_value, &len);
320
+ }
321
+ *lenp = len;
322
+ return utf8_value;
323
+ }
324
+
325
+ static inline char zsv_select_row_search_hit(struct zsv_select_data *data, zsv_parser p) {
326
+ if (!data->search_strings)
327
+ return 1;
328
+
329
+ unsigned int j = zsv_cell_count(p);
330
+ for (unsigned int i = 0; i < j; i++) {
331
+ struct zsv_cell cell = zsv_get_cell(p, i);
332
+ if (UNLIKELY(data->any_clean != 0))
333
+ cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
334
+ if (cell.len) {
335
+ for (struct zsv_select_search_str *ss = data->search_strings; ss; ss = ss->next)
336
+ if (ss->value && *ss->value && memmem(cell.str, cell.len, ss->value, ss->len))
337
+ return 1;
338
+ }
339
+ }
340
+ return 0;
341
+ }
342
+
343
+ static enum zsv_select_column_index_selection_type zsv_select_column_index_selection(const unsigned char *arg,
344
+ unsigned *lo, unsigned *hi) {
345
+ enum zsv_select_column_index_selection_type result = zsv_select_column_index_selection_type_none;
346
+
347
+ unsigned int i = 0;
348
+ unsigned int j = 0;
349
+ int n = 0;
350
+ int k = sscanf((const char *)arg, "%u-%u%n", &i, &j, &n);
351
+ if (k == 2) {
352
+ if (n >= 0 && (size_t)n == strlen((const char *)arg) && i > 0 && j >= i)
353
+ result = zsv_select_column_index_selection_type_range;
354
+ } else {
355
+ k = sscanf((const char *)arg, "%u%n", &i, &n);
356
+ if (k == 1 && n >= 0 && (size_t)n == strlen((const char *)arg)) {
357
+ if (i > 0)
358
+ result = zsv_select_column_index_selection_type_single;
359
+ } else {
360
+ k = sscanf((const char *)arg, "%u-%n", &i, &n);
361
+ if (k == 1 && n >= 0 && (size_t)n == strlen((const char *)arg)) {
362
+ if (i > 0) {
363
+ result = zsv_select_column_index_selection_type_lower_bounded;
364
+ }
365
+ }
366
+ }
367
+ }
368
+ if (lo)
369
+ *lo = i;
370
+ if (hi)
371
+ *hi = j;
372
+ return result;
373
+ }
374
+
375
+ // zsv_select_check_exclusions_are_indexes(): return err
376
+ static int zsv_select_check_exclusions_are_indexes(struct zsv_select_data *data) {
377
+ int err = 0;
378
+ for (unsigned int e = 0; e < data->exclusion_count; e++) {
379
+ const unsigned char *arg = data->exclusions[e];
380
+ if (zsv_select_column_index_selection(arg, NULL, NULL) == zsv_select_column_index_selection_type_none)
381
+ err = zsv_printerr(1, "Invalid column index: %s", arg);
382
+ }
383
+ return err;
384
+ }
385
+
386
+ // demo_random_bw_1_and_100(): this is a poor random number generator. you probably
387
+ // will want to use a better one
388
+ static double demo_random_bw_1_and_100(void) {
389
+ #ifdef HAVE_ARC4RANDOM_UNIFORM
390
+ return (long double)(arc4random_uniform(1000000)) / 10000;
391
+ #else
392
+ double max = 100.0;
393
+ unsigned int n;
394
+ #ifdef HAVE_RAND_S
395
+ unsigned int tries = 0;
396
+ while (rand_s(&n) && tries++ < 10)
397
+ ;
398
+ return (double)n / ((double)UINT_MAX + 1) * max;
399
+ #else
400
+ unsigned int umax = ~0;
401
+ n = rand();
402
+ return (double)n / ((double)(umax) + 1) * max;
403
+ #endif
404
+ #endif
405
+ }
406
+
407
+ // zsv_select_output_row(): output row data
408
+ static void zsv_select_output_data_row(struct zsv_select_data *data, zsv_parser p) {
409
+ unsigned int cnt = data->output_cols_count;
410
+ char first = 1;
411
+ if (data->prepend_line_number) {
412
+ zsv_writer_cell_zu(data->csv_writer, first, data->data_row_count);
413
+ first = 0;
414
+ }
415
+
416
+ /* print data row */
417
+ for (unsigned int i = 0; i < cnt; i++) { // for each output column
418
+ unsigned int in_ix = data->out2in[i].ix;
419
+ struct zsv_cell cell = zsv_get_cell(p, in_ix);
420
+ if (UNLIKELY(data->any_clean != 0))
421
+ cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
422
+ if (VERY_UNLIKELY(data->distinct == ZSV_SELECT_DISTINCT_MERGE)) {
423
+ if (UNLIKELY(cell.len == 0)) {
424
+ for (struct zsv_select_uint_list *ix = data->out2in[i].merge.indexes; ix; ix = ix->next) {
425
+ unsigned int m_ix = ix->value;
426
+ cell = zsv_get_cell(p, m_ix);
427
+ if (cell.len) {
428
+ if (UNLIKELY(data->any_clean != 0))
429
+ cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
430
+ if (cell.len)
431
+ break;
432
+ }
433
+ }
434
+ }
435
+ }
436
+ zsv_writer_cell(data->csv_writer, first, cell.str, cell.len, cell.quoted);
437
+ first = 0;
438
+ }
439
+ }
440
+
441
+ static void zsv_select_data_row(struct zsv_select_data *data, zsv_parser p) {
442
+ data->data_row_count++;
443
+
444
+ if (UNLIKELY(zsv_cell_count(p) == 0 || data->cancelled))
445
+ return;
446
+
447
+ // check if we should skip this row
448
+ data->skip_this_row = 0;
449
+ if (UNLIKELY(data->skip_data_rows)) {
450
+ data->skip_data_rows--;
451
+ data->skip_this_row = 1;
452
+ } else if (UNLIKELY(data->sample_every_n || data->sample_pct)) {
453
+ data->skip_this_row = 1;
454
+ if (data->sample_every_n && data->data_row_count % data->sample_every_n == 1)
455
+ data->skip_this_row = 0;
456
+ if (data->sample_pct && demo_random_bw_1_and_100() <= data->sample_pct)
457
+ data->skip_this_row = 0;
458
+ }
459
+
460
+ if (LIKELY(!data->skip_this_row)) {
461
+ // if we have a search filter, check that
462
+ char skip = 0;
463
+ skip = !zsv_select_row_search_hit(data, p);
464
+ if (!skip) {
465
+
466
+ // print the data row
467
+ zsv_select_output_data_row(data, p);
468
+ if (UNLIKELY(data->data_rows_limit > 0))
469
+ if (data->data_row_count + 1 >= data->data_rows_limit)
470
+ data->cancelled = 1;
471
+ }
472
+ }
473
+ if (data->data_row_count % 25000 == 0 && data->verbose)
474
+ fprintf(stderr, "Processed %zu rows\n", data->data_row_count);
475
+ }
476
+
477
+ static void zsv_select_print_header_row(struct zsv_select_data *data) {
478
+ if (data->no_header)
479
+ return;
480
+ zsv_writer_cell_prepend(data->csv_writer, (const unsigned char *)data->prepend_header);
481
+ if (data->prepend_line_number)
482
+ zsv_writer_cell_s(data->csv_writer, 1, (const unsigned char *)"#", 0);
483
+ for (unsigned int i = 0; i < data->output_cols_count; i++) {
484
+ unsigned char *header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
485
+ zsv_writer_cell_s(data->csv_writer, i == 0 && !data->prepend_line_number, header_name, 1);
486
+ }
487
+ zsv_writer_cell_prepend(data->csv_writer, NULL);
488
+ }
489
+
490
+ static void zsv_select_header_finish(struct zsv_select_data *data) {
491
+ if (zsv_select_set_output_columns(data))
492
+ data->cancelled = 1;
493
+ else
494
+ zsv_select_print_header_row(data);
495
+ }
496
+
497
+ static void zsv_select_header_row(struct zsv_select_data *data, zsv_parser p) {
498
+ if (data->cancelled)
499
+ return;
500
+
501
+ unsigned int cols = zsv_cell_count(p);
502
+ unsigned int max_header_ix = 0;
503
+ for (unsigned int i = 0; i < cols; i++) {
504
+ struct zsv_cell cell = zsv_get_cell(p, i);
505
+ if (UNLIKELY(data->any_clean != 0))
506
+ cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
507
+ if (i < data->opts->max_columns) {
508
+ data->header_names[i] = zsv_memdup(cell.str, cell.len);
509
+ max_header_ix = i + 1;
510
+ }
511
+ }
512
+
513
+ // in case we want to make this an option later
514
+ char trim_trailing_columns = 1;
515
+ if (!trim_trailing_columns)
516
+ max_header_ix = cols;
517
+
518
+ if (max_header_ix > data->header_name_count)
519
+ data->header_name_count = max_header_ix;
520
+
521
+ zsv_select_header_finish(data);
522
+ }
523
+
524
+ #define ZSV_SELECT_MAX_COLS_DEFAULT 1024
525
+ #define ZSV_SELECT_MAX_COLS_DEFAULT_S "1024"
526
+
527
+ const char *zsv_select_usage_msg[] = {
528
+ APPNAME ": extracts and outputs specified columns",
529
+ "",
530
+ "Usage: " APPNAME " [filename] [options] [-- col_specifier [... col_specifier]]",
531
+ " where col_specifier is a column name or, if the -n option is used,",
532
+ " a column index (starting at 1) or index range in the form of n-m",
533
+ " e.g. " APPNAME " -n file.csv -- 1 4-6 50 10",
534
+ " " APPNAME " file.csv -- first_col fiftieth_column \"Tenth Column\"",
535
+ "",
536
+ "Note: Outputs the columns specified after '--' separator, or all columns if omitted.",
537
+ "",
538
+ "Options:",
539
+ " -b,--with-bom : output with BOM",
540
+ // " --fixed <offset1,offset2,offset3>: parse as fixed-width text; use given comma-separated list of positive integers
541
+ // for cell end indexes",
542
+ #ifndef ZSV_CLI
543
+ " -v, --verbose : verbose output",
544
+ #endif
545
+ " -H,--head <n> : (head) only process the first n rows of input data (including header)",
546
+ " --no-header : do not output header row",
547
+ " --prepend-header <value> : prepend each column header with the given text <value>",
548
+ " -s, --search <value> : only output rows with at least one cell containing <value>",
549
+ // TO DO: " -s, --search /<pattern>/modifiers: search on regex pattern; modifiers include 'g' (global) and 'i'
550
+ // (case-insensitive)",
551
+ " --sample-every <num_of_rows> : output a sample consisting of the first row, then every nth row",
552
+ " --sample-pct <percentage> : output a randomly-selected sample (32 bits of randomness) of n%% of input rows",
553
+ " -d,--header-row-span <n> : apply header depth (rowspan) of n",
554
+ " --distinct : skip subsequent occurrences of columns with the same name",
555
+ " --merge : merge subsequent occurrences of columns with the same name",
556
+ " outputting first non-null value",
557
+ // --rename: like distinct, but instead of removing cols with dupe names, renames them, trying _<n> for n up to max
558
+ // cols
559
+ " -e <embedded_lineend_char> : char to replace embedded lineend. If left empty, embedded lineends are preserved.",
560
+ " If the provided string begins with 0x, it will be interpreted as the hex",
561
+ " representation of a string.",
562
+ " -x <column> : exclude the indicated column. can be specified more than once",
563
+ " -N,--line-number : prefix each row with the row number",
564
+ " -n : provided column indexes are numbers corresponding to column positions",
565
+ " (starting with 1), instead of names",
566
+ #ifndef ZSV_CLI
567
+ " -T : input is tab-delimited, instead of comma-delimited",
568
+ " -O,--other-delim <delim> : input is delimited with the given char",
569
+ " Note: This option does not support quoted values with embedded delimiters.",
570
+ #endif
571
+ " -w,--whitespace-clean : normalize all whitespace to space or newline, single-char (non-consecutive)",
572
+ " occurrences",
573
+ " --whitespace-clean-no-newline: clean whitespace and remove embedded newlines",
574
+ " -W,--no-trim : do not trim whitespace",
575
+ #ifndef ZSV_CLI
576
+ " -C <max_num_of_columns> : defaults to " ZSV_SELECT_MAX_COLS_DEFAULT_S,
577
+ " -L,--max-row-size <n> : set the maximum memory used for a single row",
578
+ " Default: " ZSV_ROW_MAX_SIZE_MIN_S " (min), " ZSV_ROW_MAX_SIZE_DEFAULT_S " (max)",
579
+ #endif
580
+ " -o <filename> : filename to save output to",
581
+ NULL,
582
+ };
583
+
584
+ static void zsv_select_usage(void) {
585
+ for (size_t i = 0; zsv_select_usage_msg[i]; i++)
586
+ fprintf(stdout, "%s\n", zsv_select_usage_msg[i]);
587
+ }
588
+
589
+ static void zsv_select_cleanup(struct zsv_select_data *data) {
590
+ if (data->opts->stream && data->opts->stream != stdin)
591
+ fclose(data->opts->stream);
592
+
593
+ zsv_writer_delete(data->csv_writer);
594
+ zsv_select_search_str_delete(data->search_strings);
595
+
596
+ if (data->distinct == ZSV_SELECT_DISTINCT_MERGE) {
597
+ for (unsigned int i = 0; i < data->output_cols_count; i++) {
598
+ for (struct zsv_select_uint_list *next, *ix = data->out2in[i].merge.indexes; ix; ix = next) {
599
+ next = ix->next;
600
+ free(ix);
601
+ }
602
+ }
603
+ }
604
+ free(data->out2in);
605
+
606
+ for (unsigned int i = 0; i < data->header_name_count; i++)
607
+ free(data->header_names[i]);
608
+ free(data->header_names);
609
+
610
+ // free(data->fixed.offsets);
611
+ }
612
+
613
+ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
614
+ struct zsv_prop_handler *custom_prop_handler) {
615
+ if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
616
+ zsv_select_usage();
617
+ return zsv_status_ok;
618
+ }
619
+
620
+ int err = 0;
621
+ struct zsv_select_data data = {0};
622
+ data.opts = opts;
623
+ const char *input_path = NULL;
624
+ struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
625
+ int col_index_arg_i = 0;
626
+ enum zsv_status stat = zsv_status_ok;
627
+ for (int arg_i = 1; stat == zsv_status_ok && arg_i < argc; arg_i++) {
628
+ if (!strcmp(argv[arg_i], "--")) {
629
+ col_index_arg_i = arg_i + 1;
630
+ break;
631
+ }
632
+ if (!strcmp(argv[arg_i], "-b") || !strcmp(argv[arg_i], "--with-bom"))
633
+ writer_opts.with_bom = 1;
634
+ /*
635
+ else if(!strcmp(argv[arg_i], "--fixed")) {
636
+ if(++arg_i >= argc)
637
+ stat = zsv_printerr(1, "%s option requires parameter", argv[arg_i-1]);
638
+ else { // parse offsets
639
+ data.fixed.count = 1;
640
+ for(const char *s = argv[arg_i]; *s; s++)
641
+ if(*s == ',')
642
+ data.fixed.count++;
643
+ free(data.fixed.offsets);
644
+ data.fixed.offsets = malloc(data.fixed.count * sizeof(*data.fixed.offsets));
645
+ size_t count = 0;
646
+ const char *start = argv[arg_i];
647
+ for(const char *end = argv[arg_i]; ; end++) {
648
+ if(*end == ',' || *end == '\0') {
649
+ if(!sscanf(start, "%zu,", &data.fixed.offsets[count++])) {
650
+ stat = zsv_printerr(1, "Invalid offset: %s.*\n", end - start, start);
651
+ break;
652
+ } else if(*end == '\0')
653
+ break;
654
+ else {
655
+ start = end + 1;
656
+ if(*start == '\0')
657
+ break;
658
+ }
659
+ }
660
+ }
661
+ }
662
+ } */
663
+ else if (!strcmp(argv[arg_i], "--distinct"))
664
+ data.distinct = 1;
665
+ else if (!strcmp(argv[arg_i], "--merge"))
666
+ data.distinct = ZSV_SELECT_DISTINCT_MERGE;
667
+ else if (!strcmp(argv[arg_i], "-o") || !strcmp(argv[arg_i], "--output")) {
668
+ if (++arg_i >= argc)
669
+ stat = zsv_printerr(1, "%s option requires parameter", argv[arg_i - 1]);
670
+ else if (writer_opts.stream && writer_opts.stream != stdout)
671
+ stat = zsv_printerr(1, "Output file specified more than once");
672
+ else if (!(writer_opts.stream = fopen(argv[arg_i], "wb")))
673
+ stat = zsv_printerr(1, "Unable to open for writing: %s", argv[arg_i]);
674
+ else if (data.opts->verbose)
675
+ fprintf(stderr, "Opened %s for write\n", argv[arg_i]);
676
+ } else if (!strcmp(argv[arg_i], "-N") || !strcmp(argv[arg_i], "--line-number")) {
677
+ data.prepend_line_number = 1;
678
+ } else if (!strcmp(argv[arg_i], "-n"))
679
+ data.use_header_indexes = 1;
680
+ else if (!strcmp(argv[arg_i], "-s") || !strcmp(argv[arg_i], "--search")) {
681
+ arg_i++;
682
+ if (arg_i < argc && strlen(argv[arg_i]))
683
+ zsv_select_add_search(&data, argv[arg_i]);
684
+ else
685
+ stat = zsv_printerr(1, "%s option requires a value", argv[arg_i - 1]);
686
+ } else if (!strcmp(argv[arg_i], "-v") || !strcmp(argv[arg_i], "--verbose")) {
687
+ data.verbose = 1;
688
+ } else if (!strcmp(argv[arg_i], "-w") || !strcmp(argv[arg_i], "--whitespace-clean"))
689
+ data.clean_white = 1;
690
+ else if (!strcmp(argv[arg_i], "--whitespace-clean-no-newline")) {
691
+ data.clean_white = 1;
692
+ data.whitespace_clean_flags = 1;
693
+ } else if (!strcmp(argv[arg_i], "-W") || !strcmp(argv[arg_i], "--no-trim")) {
694
+ data.no_trim_whitespace = 1;
695
+ } else if (!strcmp(argv[arg_i], "--sample-every")) {
696
+ arg_i++;
697
+ if (!(arg_i < argc))
698
+ stat = zsv_printerr(1, "--sample-every option requires a value");
699
+ else if (atoi(argv[arg_i]) <= 0)
700
+ stat = zsv_printerr(1, "--sample-every value should be an integer > 0");
701
+ else
702
+ data.sample_every_n = atoi(argv[arg_i]);
703
+ } else if (!strcmp(argv[arg_i], "--sample-pct")) {
704
+ arg_i++;
705
+ double d;
706
+ if (!(arg_i < argc))
707
+ stat = zsv_printerr(1, "--sample-pct option requires a value");
708
+ else if (!(d = atof(argv[arg_i])) && d > 0 && d < 100)
709
+ stat = zsv_printerr(
710
+ -1, "--sample-pct value should be a number between 0 and 100 (e.g. 1.5 for a sample of 1.5%% of the data");
711
+ else
712
+ data.sample_pct = d;
713
+ } else if (!strcmp(argv[arg_i], "--prepend-header"))
714
+ data.prepend_header = zsv_next_arg(++arg_i, argc, argv, &err);
715
+ else if (!strcmp(argv[arg_i], "--no-header"))
716
+ data.no_header = 1;
717
+ else if (!strcmp(argv[arg_i], "-H") || !strcmp(argv[arg_i], "--head")) {
718
+ if (!(arg_i + 1 < argc && atoi(argv[arg_i + 1]) >= 0))
719
+ stat = zsv_printerr(1, "%s option value invalid: should be positive integer; got %s", argv[arg_i],
720
+ arg_i + 1 < argc ? argv[arg_i + 1] : "");
721
+ else
722
+ data.data_rows_limit = atoi(argv[++arg_i]) + 1;
723
+ } else if (!strcmp(argv[arg_i], "-D") || !strcmp(argv[arg_i], "--skip-data")) {
724
+ ++arg_i;
725
+ if (!(arg_i < argc && atoi(argv[arg_i]) >= 0))
726
+ stat = zsv_printerr(1, "%s option value invalid: should be positive integer", argv[arg_i - 1]);
727
+ else
728
+ data.skip_data_rows = atoi(argv[arg_i]);
729
+ } else if (!strcmp(argv[arg_i], "-e")) {
730
+ ++arg_i;
731
+ if (data.embedded_lineend)
732
+ stat = zsv_printerr(1, "-e option specified more than once");
733
+ else if (strlen(argv[arg_i]) != 1)
734
+ stat = zsv_printerr(1, "-e option value must be a single character");
735
+ else if (arg_i < argc)
736
+ data.embedded_lineend = *argv[arg_i];
737
+ else
738
+ stat = zsv_printerr(1, "-e option requires a value");
739
+ } else if (!strcmp(argv[arg_i], "-x")) {
740
+ arg_i++;
741
+ if (!(arg_i < argc))
742
+ stat = zsv_printerr(1, "%s option requires a value", argv[arg_i - 1]);
743
+ else
744
+ zsv_select_add_exclusion(&data, argv[arg_i]);
745
+ } else if (*argv[arg_i] == '-')
746
+ stat = zsv_printerr(1, "Unrecognized argument: %s", argv[arg_i]);
747
+ else if (data.opts->stream)
748
+ stat = zsv_printerr(1, "Input file was specified, cannot also read: %s", argv[arg_i]);
749
+ else if (!(data.opts->stream = fopen(argv[arg_i], "rb")))
750
+ stat = zsv_printerr(1, "Could not open for reading: %s", argv[arg_i]);
751
+ else
752
+ input_path = argv[arg_i];
753
+ }
754
+
755
+ if (data.sample_pct)
756
+ srand(time(0));
757
+
758
+ if (data.use_header_indexes && stat == zsv_status_ok)
759
+ stat = zsv_select_check_exclusions_are_indexes(&data);
760
+
761
+ if (!data.opts->stream) {
762
+ #ifdef NO_STDIN
763
+ stat = zsv_printerr(1, "Please specify an input file");
764
+ #else
765
+ data.opts->stream = stdin;
766
+ #endif
767
+ }
768
+
769
+ if (stat == zsv_status_ok) {
770
+ if (!col_index_arg_i)
771
+ data.col_argc = 0;
772
+ else {
773
+ data.col_argv = &argv[col_index_arg_i];
774
+ data.col_argc = argc - col_index_arg_i;
775
+ }
776
+
777
+ data.header_names = calloc(data.opts->max_columns, sizeof(*data.header_names));
778
+ assert(data.opts->max_columns > 0);
779
+ data.out2in = calloc(data.opts->max_columns, sizeof(*data.out2in));
780
+ data.csv_writer = zsv_writer_new(&writer_opts);
781
+ if (!(data.header_names && data.csv_writer))
782
+ stat = zsv_status_memory;
783
+ else {
784
+ zsv_parser parser;
785
+ if (zsv_new_with_properties(data.opts, custom_prop_handler, input_path, &parser) == zsv_status_ok) {
786
+ // all done with
787
+ data.any_clean = !data.no_trim_whitespace || data.clean_white || data.embedded_lineend;
788
+
789
+ // TO DO: support fixed input
790
+ // if (data.fixed.count && zsv_set_fixed_offsets(parser, data.fixed.count, data.fixed.offsets) != zsv_status_ok)
791
+ // data.cancelled = 1;
792
+
793
+ // create a local csv writer buff quoted values
794
+ unsigned char writer_buff[512];
795
+ zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
796
+
797
+ // process the input data
798
+ zsv_handle_ctrl_c_signal();
799
+ enum zsv_status status = zsv_next_row(parser);
800
+ if (status == zsv_status_row)
801
+ zsv_select_header_row(&data, parser);
802
+ while ((status = zsv_next_row(parser)) == zsv_status_row)
803
+ zsv_select_data_row(&data, parser);
804
+ zsv_delete(parser);
805
+ }
806
+ }
807
+ }
808
+ zsv_select_cleanup(&data);
809
+ if (writer_opts.stream && writer_opts.stream != stdout)
810
+ fclose(writer_opts.stream);
811
+ return stat;
812
+ }