zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,731 @@
1
+ /*
2
+ * Copyright (C) 2021 Tai Chi Minh Ralph Eastwood (self), Matt Wong (Guarnerix Inc dba Liquidaty)
3
+ * All rights reserved
4
+ *
5
+ * This file is part of zsv/lib, distributed under the license defined at
6
+ * https://opensource.org/licenses/MIT
7
+ */
8
+
9
+ #include <stdio.h>
10
+ #include <stdlib.h>
11
+ #include <string.h>
12
+ #include <ctype.h>
13
+ #include <stdint.h> // uint32_t
14
+
15
+ #ifdef ZSV_EXTRAS
16
+ #include <time.h>
17
+ #endif
18
+
19
+ #include <zsv/utils/utf8.h>
20
+ #include <zsv/utils/compiler.h>
21
+ #include <zsv/utils/string.h>
22
+
23
+ #if !defined(__AVX2__) // -mavx2 compiler flag not present
24
+ #define ZSV_NO_AVX
25
+ #define zsv_mask_t uint16_t
26
+ #define VECTOR_BYTES 16
27
+ #define NEXT_BIT __builtin_ffs
28
+ #if defined(__AVX__)
29
+ #include <emmintrin.h>
30
+ #define zsv_mask_t uint16_t
31
+ #define VECTOR_BYTES 16
32
+ #define NEXT_BIT __builtin_ffs
33
+ #define movemask_pseudo(x) _mm_movemask_epi8((__m128i)x)
34
+ #endif
35
+ #elif defined(HAVE_AVX512)
36
+ #ifndef __AVX512BW__
37
+ #error AVX512 requested, but __AVX512BW__ macro not defined
38
+ #else
39
+ #include <immintrin.h>
40
+ #define VECTOR_BYTES 64
41
+ #define zsv_mask_t uint64_t
42
+ #define movemask_pseudo(x) _mm512_movepi8_mask((__m512i)x)
43
+ #define NEXT_BIT __builtin_ffsl
44
+ #endif
45
+ #elif defined(__AVX2__) // have avx2, not avx512
46
+ #include <immintrin.h>
47
+ #define VECTOR_BYTES 32
48
+ #define zsv_mask_t uint32_t
49
+ #define movemask_pseudo(x) _mm256_movemask_epi8((__m256i)x)
50
+ #define NEXT_BIT __builtin_ffs
51
+ #else
52
+ #define ZSV_NO_AVX
53
+ #define zsv_mask_t uint16_t
54
+ #define VECTOR_BYTES 16
55
+ #define NEXT_BIT __builtin_ffs
56
+ #endif
57
+
58
+ typedef unsigned char zsv_uc_vector __attribute__((vector_size(VECTOR_BYTES)));
59
+
60
+ struct zsv_row {
61
+ size_t used, allocated, overflow;
62
+ struct zsv_cell *cells;
63
+ };
64
+
65
+ struct collate_header {
66
+ struct {
67
+ unsigned char *buff;
68
+ size_t used;
69
+ } buff;
70
+ size_t *lengths; // length PLUS 1 of each cell
71
+ size_t column_count;
72
+ };
73
+
74
+ struct zsv_scan_delim_regs {
75
+ size_t i;
76
+ size_t bytes_chunk_end;
77
+ size_t bytes_read;
78
+ char delimiter;
79
+ unsigned char c;
80
+ char skip_next_delim;
81
+ int quote;
82
+ size_t mask_total_offset;
83
+ zsv_mask_t mask;
84
+ int mask_last_start;
85
+ unsigned char location;
86
+ };
87
+
88
+ struct zsv_scan_fixed_regs {
89
+ char xx; // to do
90
+ };
91
+
92
+ #ifdef ZSV_EXTRAS
93
+ #include <inttypes.h>
94
+ #include <sqlite3.h>
95
+
96
+ struct zsv_overwrite {
97
+ struct zsv_overwrite_data odata;
98
+ void *ctx;
99
+ enum zsv_status (*next)(void *ctx, struct zsv_overwrite_data *odata);
100
+ enum zsv_status (*close)(void *ctx);
101
+ };
102
+ #endif
103
+
104
+ struct zsv_scanner {
105
+ char last;
106
+ struct {
107
+ unsigned char *buff; // provided by caller
108
+ size_t size; // provided by caller
109
+ } buff;
110
+
111
+ size_t cell_start;
112
+ unsigned char quoted : 7; // bitfield of ZSV_PARSER_QUOTE_XXX flags
113
+ unsigned char buffer_exceeded : 1;
114
+
115
+ unsigned char waiting_for_end : 1;
116
+ unsigned char checked_bom : 1;
117
+ unsigned char free_buff : 1;
118
+ unsigned char finished : 1;
119
+ unsigned char had_bom : 1;
120
+ unsigned char abort : 1;
121
+ unsigned char have_cell : 1;
122
+ unsigned char started : 1;
123
+
124
+ size_t quote_close_position;
125
+ struct zsv_opts opts;
126
+
127
+ size_t row_start;
128
+ struct zsv_row row;
129
+
130
+ size_t scanned_length;
131
+ size_t cum_scanned_length;
132
+ size_t partial_row_length;
133
+
134
+ size_t (*read)(void *buff, size_t n, size_t size, void *in);
135
+ void *in;
136
+
137
+ size_t (*filter)(void *ctx, unsigned char *buff, size_t bytes_read);
138
+ void *filter_ctx;
139
+
140
+ size_t buffer_end;
141
+ size_t old_bytes_read; // only non-zero if we must shift upon next parse_more()
142
+
143
+ const char *insert_string;
144
+
145
+ size_t empty_header_rows;
146
+
147
+ struct zsv_opts opts_orig;
148
+
149
+ #define ZSV_MODE_DELIM 0
150
+ #define ZSV_MODE_FIXED 1
151
+ #define ZSV_MODE_DELIM_PULL 2
152
+ unsigned char mode;
153
+ struct {
154
+ unsigned *offsets; // 0-based position of each cell end. offset[0] = end of first cell
155
+ unsigned count; // number of offsets
156
+ } fixed;
157
+
158
+ struct collate_header *collate_header;
159
+ size_t data_row_count; /* 0 = in header row; 1 = first data row */
160
+ struct zsv_cell (*get_cell)(zsv_parser parser, size_t ix);
161
+
162
+ struct {
163
+ union {
164
+ struct zsv_scan_delim_regs delim;
165
+ struct zsv_scan_fixed_regs fixed;
166
+ } *regs;
167
+ enum zsv_status stat; // last status
168
+ unsigned char *buff;
169
+ size_t bytes_read;
170
+ size_t row_used;
171
+ unsigned char now;
172
+ } pull;
173
+
174
+ int (*errprintf)(void *ctx, const char *format, ...);
175
+ void *errf;
176
+ int (*errclose)(void *ctx);
177
+
178
+ #ifdef ZSV_EXTRAS
179
+ struct {
180
+ size_t cum_row_count; /* total number of rows read */
181
+ time_t last_time; /* last time from which to check seconds_interval */
182
+ size_t max_rows; /* max rows to read, including header row(s) */
183
+ } progress;
184
+ struct zsv_overwrite overwrite;
185
+ #endif
186
+ };
187
+
188
+ void collate_header_destroy(struct collate_header **chp) {
189
+ if (*chp) {
190
+ struct collate_header *ch = *chp;
191
+ free(ch->buff.buff);
192
+ free(ch->lengths);
193
+ free(ch);
194
+ *chp = NULL;
195
+ }
196
+ }
197
+
198
+ /* collate_header_append(): return err */
199
+ static int collate_header_append(struct zsv_scanner *scanner, struct collate_header **chp) {
200
+ if (!*chp) {
201
+ if ((*chp = calloc(1, sizeof(struct collate_header))))
202
+ (*chp)->lengths = calloc(scanner->row.allocated, sizeof(*(*chp)->lengths));
203
+ if (!(*chp) || !(*chp)->lengths) {
204
+ free(*chp);
205
+ scanner->errprintf(scanner->errf, "Out of memory!\n");
206
+ return -1;
207
+ }
208
+ }
209
+ struct collate_header *ch = *chp;
210
+ size_t this_row_size = 0;
211
+ size_t column_count = zsv_cell_count(scanner);
212
+ for (size_t i = 0, j = column_count; i < j; i++) {
213
+ struct zsv_cell c = zsv_get_cell_1(scanner, i);
214
+ if (c.len)
215
+ this_row_size += c.len + 1; // +1: terminating null or delim
216
+ }
217
+ size_t new_row_size = ch->buff.used + this_row_size;
218
+ unsigned char *new_row = realloc(ch->buff.buff, new_row_size);
219
+ if (!new_row) {
220
+ scanner->errprintf(scanner->errf, "Out of memory!\n");
221
+ return -1;
222
+ }
223
+
224
+ // now: splice the new row into the old row, starting with the last cell
225
+ // e.g. prior row = A1.B1.C1.
226
+ // this row = A2.B2.C2.
227
+ // new_row = A1.B1.C1..........
228
+ // starting with last cell in this row, move the old data, then splice new:
229
+ // new_row = A1.B1.C1.......C2.
230
+ // new_row = A1.B1.C1....C1 C2.
231
+ // new_row = A1.B1.C1.B2.C1 C2.
232
+ // new_row = A1.B1.B1 B2.C1 C2.
233
+ // new_row = A1.A2.B1 B2.C1 C2.
234
+ // new_row = A1 A2.B1 B2.C1 C2.
235
+
236
+ size_t new_row_end = ch->buff.used + this_row_size;
237
+ size_t old_row_end = ch->buff.used;
238
+ ch->buff.used += this_row_size;
239
+ ch->buff.buff = new_row;
240
+ for (size_t i = column_count; i > 0; i--) {
241
+ struct zsv_cell c = zsv_get_cell_1(scanner, i - 1);
242
+ // copy new row's cell value to end
243
+ if (c.len) {
244
+ memcpy(new_row + new_row_end - c.len - 1, c.str, c.len);
245
+ new_row[new_row_end - 1] = ' ';
246
+ new_row_end = new_row_end - c.len - 1;
247
+ }
248
+
249
+ // move prior cell value
250
+ size_t old_cell_len = ch->lengths[i - 1]; // old_cell_len includes delim
251
+ if (old_cell_len) {
252
+ // need memmove, not memcpy
253
+ memmove(new_row + new_row_end - old_cell_len, new_row + old_row_end - old_cell_len, old_cell_len);
254
+ old_row_end -= old_cell_len;
255
+ new_row_end -= old_cell_len;
256
+ }
257
+ if (c.len)
258
+ ch->lengths[i - 1] += c.len + 1;
259
+ }
260
+ if (column_count > ch->column_count)
261
+ ch->column_count = column_count;
262
+ return 0;
263
+ }
264
+
265
+ __attribute__((always_inline)) static inline void zsv_clear_cell(struct zsv_scanner *scanner) {
266
+ scanner->quoted = 0;
267
+ }
268
+
269
+ // always_inline has a noticeable impact. do not remove without benchmarking!
270
+ __attribute__((always_inline)) static inline void cell_dl(struct zsv_scanner *scanner, unsigned char *s, size_t n) {
271
+ // handle quoting
272
+ if (VERY_LIKELY(!scanner->buffer_exceeded)) {
273
+ if (UNLIKELY(scanner->quoted > 0)) {
274
+ if (LIKELY(scanner->quote_close_position + 1 == n)) {
275
+ if (LIKELY((scanner->quoted & ZSV_PARSER_QUOTE_EMBEDDED) == 0)) {
276
+ // this is the easy and usual case: no embedded double-quotes
277
+ // just remove surrounding quotes from content
278
+ s++;
279
+ n -= 2;
280
+ } else { // embedded dbl-quotes to remove
281
+ s++;
282
+ n--;
283
+ // remove dbl-quotes. TO DO: consider adding option to skip this
284
+ for (size_t i = 0; i + 1 < n; i++) {
285
+ if (s[i] == '"' && s[i + 1] == '"') {
286
+ if (n > i + 2)
287
+ memmove(s + i + 1, s + i + 2, n - i - 2);
288
+ n--;
289
+ }
290
+ }
291
+ n--;
292
+ }
293
+ } else {
294
+ if (scanner->quote_close_position) {
295
+ // the first char was a quote, and we have content after the closing quote
296
+ // the solution below is a generalized on that will work
297
+ // for the easy and usual case, but by handling separately
298
+ // we avoid the memmove in the easy / usual case
299
+ memmove(s + 1, s, scanner->quote_close_position);
300
+ s += 2;
301
+ n -= 2;
302
+ if (UNLIKELY((scanner->quoted & ZSV_PARSER_QUOTE_EMBEDDED) != 0)) {
303
+ // remove dbl-quotes
304
+ for (size_t i = 0; i + 1 < n; i++) {
305
+ if (s[i] == '"' && s[i + 1] == '"') {
306
+ if (n > i + 2)
307
+ memmove(s + i + 1, s + i + 2, n - i - 2);
308
+ n--;
309
+ }
310
+ }
311
+ }
312
+ }
313
+ }
314
+ } else if (UNLIKELY(scanner->opts.delimiter != ',')) {
315
+ if (memchr(s, ',', n))
316
+ scanner->quoted = ZSV_PARSER_QUOTE_NEEDED;
317
+ }
318
+ // end quote handling
319
+
320
+ if (scanner->opts.malformed_utf8_replace) {
321
+ if (scanner->opts.malformed_utf8_replace < 0)
322
+ n = zsv_strencode(s, n, 0, NULL, NULL);
323
+ else
324
+ n = zsv_strencode(s, n, scanner->opts.malformed_utf8_replace, NULL, NULL);
325
+ }
326
+ }
327
+ if (UNLIKELY(scanner->opts.cell_handler != NULL))
328
+ scanner->opts.cell_handler(scanner->opts.ctx, s, n);
329
+ if (VERY_LIKELY(scanner->row.used < scanner->row.allocated)) {
330
+ struct zsv_row *row = &scanner->row;
331
+ struct zsv_cell c = {s, n, scanner->opts.no_quotes ? 1 : scanner->quoted, 0};
332
+ row->cells[row->used++] = c;
333
+ } else
334
+ scanner->row.overflow++;
335
+ scanner->have_cell = 1;
336
+
337
+ zsv_clear_cell(scanner);
338
+ }
339
+
340
+ __attribute__((always_inline)) static inline enum zsv_status row_dl(struct zsv_scanner *scanner) {
341
+ if (VERY_UNLIKELY(scanner->row.overflow)) {
342
+ scanner->errprintf(scanner->errf, "Warning: number of columns (%zu) exceeds row max (%zu)\n",
343
+ scanner->row.allocated + scanner->row.overflow, scanner->row.allocated);
344
+ scanner->row.overflow = 0;
345
+ }
346
+ if (VERY_LIKELY(scanner->opts.row_handler != NULL)) // TO DO: disallow row_handler to be null; if null, set to dummy
347
+ scanner->opts.row_handler(scanner->opts.ctx);
348
+ // Note: scanner->data_row_count will be incremented AFTER this call
349
+ // in order to accommodate pull parsing, in which case incrementing here
350
+ // would be too early
351
+ #ifdef ZSV_EXTRAS
352
+ scanner->progress.cum_row_count++;
353
+ if (VERY_UNLIKELY(scanner->opts.progress.rows_interval &&
354
+ scanner->progress.cum_row_count % scanner->opts.progress.rows_interval == 0)) {
355
+ char ok;
356
+ if (!scanner->opts.progress.seconds_interval)
357
+ ok = 1;
358
+ else {
359
+ // using timer_create() would be better, but is not currently supported on
360
+ // all platforms, so the fallback is to poll
361
+ time_t now = time(NULL);
362
+ if (now > scanner->progress.last_time &&
363
+ (unsigned int)(now - scanner->progress.last_time) >= scanner->opts.progress.seconds_interval) {
364
+ ok = 1;
365
+ scanner->progress.last_time = now;
366
+ } else
367
+ ok = 0;
368
+ }
369
+ if (ok && scanner->opts.progress.callback)
370
+ scanner->abort = scanner->opts.progress.callback(scanner->opts.progress.ctx, scanner->progress.cum_row_count);
371
+ #ifndef NDEBUG
372
+ if (scanner->abort)
373
+ scanner->errprintf(scanner->errf, "ZSV parsing aborted at %zu\n", scanner->progress.cum_row_count);
374
+ #endif
375
+ }
376
+ if (VERY_UNLIKELY(scanner->progress.max_rows > 0)) {
377
+ if (VERY_UNLIKELY(scanner->progress.cum_row_count == scanner->progress.max_rows)) {
378
+ scanner->abort = 1;
379
+ scanner->row.used = 0;
380
+ return zsv_status_max_rows_read;
381
+ }
382
+ }
383
+
384
+ #endif
385
+ if (VERY_UNLIKELY(scanner->abort))
386
+ return zsv_status_cancelled;
387
+ scanner->have_cell = 0;
388
+ scanner->row.used = 0;
389
+ return zsv_status_ok;
390
+ }
391
+
392
+ __attribute__((always_inline)) static inline enum zsv_status cell_and_row_dl(struct zsv_scanner *scanner,
393
+ unsigned char *s, size_t n) {
394
+ cell_dl(scanner, s, n);
395
+ return row_dl(scanner);
396
+ }
397
+
398
+ #ifndef movemask_pseudo
399
+ /*
400
+ provide our own pseudo-movemask, which sets the 1 bit for each corresponding
401
+ non-zero value in the vector (as opposed to real movemask which sets the bit
402
+ only for each corresponding non-zero highest-bit value in the vector)
403
+ */
404
+
405
+ #if defined(__EMSCRIPTEN__) && defined(__SSE2__)
406
+ #include <wasm_simd128.h>
407
+ #define movemask_pseudo(x) wasm_i8x16_bitmask(x)
408
+
409
+ #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
410
+ #include <arm_neon.h>
411
+ static inline zsv_mask_t movemask_pseudo(zsv_uc_vector v) {
412
+ // see https://stackoverflow.com/questions/11870910/
413
+ static const uint8_t
414
+ __attribute__((aligned(16))) _powers[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128};
415
+ uint8x16_t mm_powers = vld1q_u8(_powers);
416
+
417
+ // compute the mask from the input
418
+ uint64x2_t imask = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(v, mm_powers))));
419
+
420
+ // Get the resulting bytes
421
+ uint16_t mask;
422
+ vst1q_lane_u8((uint8_t *)&mask + 0, (uint8x16_t)imask, 0);
423
+ vst1q_lane_u8((uint8_t *)&mask + 1, (uint8x16_t)imask, 8);
424
+ return mask;
425
+ }
426
+
427
+ #elif defined(__SSE2__)
428
+
429
+ typedef char zsv_c_vector __attribute__((vector_size(VECTOR_BYTES)));
430
+ #define movemask_pseudo(x) __builtin_ia32_pmovmskb128((zsv_c_vector)x)
431
+
432
+ #else
433
+
434
+ // slow path
435
+ #if defined(__EMSCRIPTEN__)
436
+ #warning \
437
+ "Compiling with emscripten, without using SIMD. To use SIMD, compile with -msse2 -msimd128 -experimental-wasm-simd and -I/path/to/emsdk/upstream/lib/clang/16.0.0/include"
438
+ #endif
439
+
440
+ static inline zsv_mask_t movemask_pseudo(zsv_uc_vector v) {
441
+ zsv_mask_t mask = 0, tmp = 1;
442
+ for (size_t i = 0; i < sizeof(zsv_uc_vector); i++) {
443
+ mask |= (v[i] ? tmp : 0);
444
+ tmp <<= 1;
445
+ }
446
+
447
+ return mask;
448
+ }
449
+
450
+ #endif // __EMSCRIPTEN__
451
+ #endif // ndef movemask_pseudo
452
+
453
+ #include "vector_delim.c"
454
+
455
+ #ifdef ZSV_SUPPORT_PULL_PARSER
456
+ #undef ZSV_SUPPORT_PULL_PARSER
457
+ #endif
458
+ #define ZSV_SCAN_DELIM zsv_scan_delim
459
+ #include "zsv_scan_delim.c"
460
+ #undef ZSV_SCAN_DELIM
461
+ #undef scanner_last
462
+
463
+ #define ZSV_SUPPORT_PULL_PARSER 1
464
+ #define ZSV_SCAN_DELIM zsv_scan_delim_pull
465
+ #include "zsv_scan_delim.c"
466
+
467
+ #include "zsv_scan_fixed.c"
468
+
469
+ static enum zsv_status zsv_scan(struct zsv_scanner *scanner, unsigned char *buff, size_t bytes_read) {
470
+ switch (scanner->mode) {
471
+ case ZSV_MODE_FIXED:
472
+ return zsv_scan_fixed(scanner, buff, bytes_read);
473
+ case ZSV_MODE_DELIM_PULL:
474
+ // return zsv_status_row or zsv_status_ok (next call to parse_more)
475
+ return zsv_scan_delim_pull(scanner, buff, bytes_read);
476
+ default:
477
+ return zsv_scan_delim(scanner, buff, bytes_read);
478
+ }
479
+ }
480
+
481
+ #define ZSV_BOM "\xef\xbb\xbf"
482
+
483
+ // optional: set a filter function to filter data before it is processed
484
+ // function should return the number of bytes to process. this may be smaller
485
+ // but may not be larger than the original number of bytes, and any data modification
486
+ // must be done in-place to *buff
487
+ enum zsv_status zsv_set_scan_filter(struct zsv_scanner *scanner,
488
+ size_t (*filter)(void *ctx, unsigned char *buff, size_t bytes_read), void *ctx) {
489
+ scanner->filter = filter;
490
+ scanner->filter_ctx = ctx;
491
+ return zsv_status_ok;
492
+ }
493
+
494
+ static void apply_callbacks(struct zsv_scanner *scanner) {
495
+ if (UNLIKELY(scanner->opts.cell_handler != NULL)) {
496
+ // call the user-provided cell() callback on each cell
497
+ unsigned char saved_quoted = scanner->quoted;
498
+ for (size_t i = 0, j = zsv_cell_count(scanner); i < j; i++) {
499
+ struct zsv_cell c = zsv_get_cell_1(scanner, i);
500
+ scanner->quoted = c.quoted;
501
+ scanner->opts.cell_handler(scanner->opts.ctx, c.str, c.len);
502
+ }
503
+ scanner->quoted = saved_quoted;
504
+ }
505
+ // call the user-provided row() callback
506
+ if (VERY_LIKELY(scanner->opts.row_handler != NULL))
507
+ scanner->opts.row_handler(scanner->opts.ctx);
508
+ }
509
+
510
+ static void set_callbacks(struct zsv_scanner *scanner);
511
+
512
+ static char zsv_internal_row_is_blank(zsv_parser parser) {
513
+ for (unsigned int i = 0; i < parser->row.used; i++)
514
+ if (parser->row.cells[i].len)
515
+ return 0;
516
+ return 1;
517
+ }
518
+
519
+ static void skip_to_first_row_w_data(void *ctx) {
520
+ struct zsv_scanner *scanner = ctx;
521
+ if (LIKELY(zsv_internal_row_is_blank(scanner) == 0)) {
522
+ scanner->opts.keep_empty_header_rows = 1;
523
+ if (scanner->empty_header_rows) {
524
+ scanner->errprintf(scanner->errf, "Warning: skipped %zu empty header rows; suggest using:\n --skip-head %zu\n",
525
+ scanner->empty_header_rows, scanner->empty_header_rows + scanner->opts_orig.rows_to_ignore);
526
+ }
527
+ set_callbacks(scanner);
528
+ apply_callbacks(scanner);
529
+ } else // entire row was empty
530
+ scanner->empty_header_rows++;
531
+ }
532
+
533
+ static void ignore_header_rows(void *ctx) {
534
+ struct zsv_scanner *scanner = ctx;
535
+ if (scanner->opts.rows_to_ignore)
536
+ scanner->opts.rows_to_ignore--;
537
+ if (!scanner->opts.rows_to_ignore)
538
+ set_callbacks(scanner);
539
+ }
540
+
541
+ static void collate_header_row(void *ctx) {
542
+ struct zsv_scanner *scanner = ctx;
543
+ if (scanner->opts.header_span) {
544
+ --scanner->opts.header_span;
545
+
546
+ // save this row
547
+
548
+ // first, make sure this row has at least as many cells as the largest prior row
549
+ if (scanner->collate_header) {
550
+ for (size_t i = zsv_cell_count(scanner); i < scanner->row.allocated && i < scanner->collate_header->column_count;
551
+ i++)
552
+ memset(&scanner->row.cells[i], 0, sizeof(scanner->row.cells[i]));
553
+ scanner->row.used = scanner->collate_header->column_count;
554
+ }
555
+
556
+ if (collate_header_append(scanner, &scanner->collate_header))
557
+ scanner->abort = 1;
558
+ }
559
+
560
+ if (!scanner->opts.header_span) {
561
+ // finished with header; combine all rows into a single row
562
+ set_callbacks(scanner);
563
+ if (scanner->collate_header) {
564
+ size_t offset = 0;
565
+ for (size_t i = 0; i < scanner->collate_header->column_count; i++) {
566
+ size_t len_plus1 = scanner->collate_header->lengths[i];
567
+ scanner->row.cells[i].str = scanner->collate_header->buff.buff + offset;
568
+ if (len_plus1) {
569
+ scanner->row.cells[i].len = len_plus1 - 1;
570
+ scanner->row.cells[i].quoted = 1;
571
+ } else
572
+ scanner->row.cells[i].len = 0;
573
+ offset += len_plus1;
574
+ }
575
+ }
576
+
577
+ apply_callbacks(scanner);
578
+ if (scanner->mode != ZSV_MODE_DELIM_PULL)
579
+ collate_header_destroy(&scanner->collate_header);
580
+ }
581
+ }
582
+
583
+ static void set_callbacks(struct zsv_scanner *scanner) {
584
+ if (scanner->opts.rows_to_ignore) {
585
+ scanner->opts.row_handler = ignore_header_rows;
586
+ scanner->opts.cell_handler = NULL;
587
+ scanner->opts.ctx = scanner;
588
+ } else if (scanner->mode != ZSV_MODE_FIXED && !scanner->opts.keep_empty_header_rows) {
589
+ scanner->opts.row_handler = skip_to_first_row_w_data;
590
+ scanner->opts.cell_handler = NULL;
591
+ scanner->opts.ctx = scanner;
592
+ } else if (scanner->opts.header_span > 1) {
593
+ scanner->opts.row_handler = collate_header_row;
594
+ scanner->opts.cell_handler = NULL;
595
+ scanner->opts.ctx = scanner;
596
+ } else {
597
+ #ifdef ZSV_EXTRAS
598
+ if (scanner->overwrite.odata.have)
599
+ scanner->get_cell = zsv_get_cell_with_overwrite;
600
+ else
601
+ #endif
602
+ scanner->get_cell = zsv_get_cell_1;
603
+ scanner->data_row_count = 0;
604
+ scanner->opts.row_handler = scanner->opts_orig.row_handler;
605
+ scanner->opts.cell_handler = scanner->opts_orig.cell_handler;
606
+ scanner->opts.ctx = scanner->opts_orig.ctx;
607
+ }
608
+ }
609
+
610
+ static void zsv_throwaway_row(void *ctx) {
611
+ struct zsv_scanner *scanner = ctx;
612
+ if (scanner->opts.overflow_row_handler != NULL) {
613
+ if (zsv_cell_count(scanner) > 1 || zsv_get_cell_1(scanner, 0).len > 0)
614
+ scanner->opts.overflow_row_handler(ctx);
615
+ }
616
+ scanner->buffer_exceeded = 0;
617
+ set_callbacks(ctx);
618
+ }
619
+
620
+ #ifdef ZSV_EXTRAS
621
+
622
+ static int zsv_have_overwrite(zsv_parser parser, size_t row_ix, size_t col_ix) {
623
+ struct zsv_overwrite *overwrite = &parser->overwrite;
624
+ while (overwrite->odata.have && overwrite->odata.row_ix < row_ix)
625
+ overwrite->next(overwrite->ctx, &overwrite->odata);
626
+ while (overwrite->odata.have && overwrite->odata.row_ix == row_ix && overwrite->odata.col_ix < col_ix)
627
+ overwrite->next(overwrite->ctx, &overwrite->odata);
628
+ if (!overwrite->odata.have)
629
+ parser->get_cell = zsv_get_cell_1;
630
+ return overwrite->odata.have && overwrite->odata.row_ix == row_ix && overwrite->odata.col_ix == col_ix;
631
+ }
632
+
633
+ static struct zsv_cell zsv_get_cell_with_overwrite(zsv_parser parser, size_t col_ix) {
634
+ if (VERY_LIKELY(col_ix < parser->row.used)) {
635
+ size_t row_ix = parser->data_row_count;
636
+ if (!zsv_have_overwrite(parser, row_ix, col_ix))
637
+ return parser->row.cells[col_ix];
638
+
639
+ struct zsv_cell c = parser->overwrite.odata.val;
640
+ c.overwritten = 1;
641
+ return c;
642
+ }
643
+ struct zsv_cell c = {0, 0, 0, 0};
644
+ return c;
645
+ }
646
+ #endif
647
+
648
+ static int zsv_scanner_init(struct zsv_scanner *scanner, struct zsv_opts *opts) {
649
+ size_t need_buff_size = 0;
650
+ scanner->errprintf = opts->errprintf ? opts->errprintf : zsv_generic_fprintf;
651
+ scanner->errf = opts->errf ? opts->errf : stderr;
652
+ scanner->errclose = opts->errclose;
653
+ if (opts->malformed_utf8_replace == ZSV_MALFORMED_UTF8_DO_NOT_REPLACE)
654
+ opts->malformed_utf8_replace = 0;
655
+ if (opts->buffsize < opts->max_row_size * 2)
656
+ need_buff_size = opts->max_row_size * 2;
657
+ opts->delimiter = opts->delimiter ? opts->delimiter : ',';
658
+ if (opts->delimiter == '\n' || opts->delimiter == '\r' || opts->delimiter == '"') {
659
+ scanner->errprintf(scanner->errf, "warning: ignoring illegal delimiter\n");
660
+ opts->delimiter = ',';
661
+ }
662
+
663
+ if (opts->insert_header_row)
664
+ scanner->insert_string = opts->insert_header_row;
665
+
666
+ if (need_buff_size < ZSV_MIN_SCANNER_BUFFSIZE)
667
+ need_buff_size = ZSV_MIN_SCANNER_BUFFSIZE;
668
+ if (opts->buffsize < need_buff_size) {
669
+ if (opts->buffsize > 0) {
670
+ if (need_buff_size == ZSV_MIN_SCANNER_BUFFSIZE)
671
+ scanner->errprintf(scanner->errf, "Increasing --buff-size to minimum %zu\n", need_buff_size);
672
+ else
673
+ scanner->errprintf(scanner->errf, "Increasing --buff-size to %zu to accommmodate max-row-size of %u\n",
674
+ need_buff_size, opts->max_row_size);
675
+ }
676
+ opts->buffsize = need_buff_size;
677
+ }
678
+ if (opts->buffsize == 0)
679
+ opts->buffsize = ZSV_DEFAULT_SCANNER_BUFFSIZE;
680
+ else if (opts->buffsize < ZSV_MIN_SCANNER_BUFFSIZE)
681
+ opts->buffsize = ZSV_MIN_SCANNER_BUFFSIZE;
682
+
683
+ scanner->in = opts->stream;
684
+ if (!opts->read) {
685
+ scanner->read = (zsv_generic_read)fread;
686
+ if (!opts->stream)
687
+ scanner->in = stdin;
688
+ } else {
689
+ scanner->read = opts->read;
690
+ scanner->in = opts->stream;
691
+ }
692
+ scanner->buff.buff = opts->buff;
693
+ scanner->buff.size = opts->buffsize;
694
+
695
+ if (opts->buffsize && !opts->buff) {
696
+ scanner->buff.buff = malloc(opts->buffsize);
697
+ scanner->free_buff = 1;
698
+ }
699
+
700
+ #ifdef ZSV_EXTRAS
701
+ if (opts->max_rows)
702
+ scanner->progress.max_rows = opts->max_rows;
703
+ #endif
704
+ if (scanner->buff.buff) {
705
+ scanner->opts = *opts;
706
+ scanner->opts_orig = *opts;
707
+ if (!scanner->opts.max_columns)
708
+ scanner->opts.max_columns = 1024;
709
+ set_callbacks(scanner);
710
+ if ((scanner->row.allocated = scanner->opts.max_columns) &&
711
+ (scanner->row.cells = calloc(scanner->row.allocated, sizeof(*scanner->row.cells)))) {
712
+ #ifdef ZSV_EXTRAS
713
+ // initialize overwrites
714
+ if (scanner->opts.overwrite.open && !scanner->opts.overwrite.cancel) {
715
+ if (scanner->opts.overwrite.open(scanner->opts.overwrite.ctx) == zsv_status_ok) {
716
+ scanner->overwrite.odata.have = 1;
717
+ scanner->overwrite.next = scanner->opts.overwrite.next;
718
+ scanner->overwrite.close = scanner->opts.overwrite.close;
719
+ scanner->overwrite.ctx = scanner->opts.overwrite.ctx;
720
+ // load the first overwrite
721
+ scanner->overwrite.next(scanner->overwrite.ctx, &scanner->overwrite.odata);
722
+ return 0;
723
+ }
724
+ return 1;
725
+ }
726
+ #endif
727
+ return 0;
728
+ }
729
+ }
730
+ return 1;
731
+ }