zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,285 @@
1
+ #ifdef ZSV_SUPPORT_PULL_PARSER
2
+
3
+ #define zsv_internal_save_reg(x) scanner->pull.regs->delim.x = x
4
+ #define zsv_internal_save_regs(loc) \
5
+ do { \
6
+ scanner->pull.regs->delim.location = loc; \
7
+ scanner->pull.buff = buff; \
8
+ scanner->pull.bytes_read = bytes_read; \
9
+ zsv_internal_save_reg(i); \
10
+ zsv_internal_save_reg(bytes_chunk_end); \
11
+ zsv_internal_save_reg(bytes_read); \
12
+ zsv_internal_save_reg(delimiter); \
13
+ zsv_internal_save_reg(c); \
14
+ zsv_internal_save_reg(skip_next_delim); \
15
+ zsv_internal_save_reg(quote); \
16
+ zsv_internal_save_reg(mask_total_offset); \
17
+ zsv_internal_save_reg(mask); \
18
+ zsv_internal_save_reg(mask_last_start); \
19
+ } while (0)
20
+
21
+ #define zsv_internal_restore_reg(x) x = scanner->pull.regs->delim.x
22
+ #define zsv_internal_restore_regs() \
23
+ do { \
24
+ buff = scanner->pull.buff; \
25
+ bytes_read = scanner->pull.bytes_read; \
26
+ zsv_internal_restore_reg(i); \
27
+ zsv_internal_restore_reg(bytes_chunk_end); \
28
+ zsv_internal_restore_reg(bytes_read); \
29
+ zsv_internal_restore_reg(delimiter); \
30
+ zsv_internal_restore_reg(c); \
31
+ zsv_internal_restore_reg(skip_next_delim); \
32
+ zsv_internal_restore_reg(quote); \
33
+ zsv_internal_restore_reg(mask_total_offset); \
34
+ zsv_internal_restore_reg(mask); \
35
+ zsv_internal_restore_reg(mask_last_start); \
36
+ memset(&v.dl, scanner->opts.delimiter, sizeof(zsv_uc_vector)); \
37
+ memset(&v.nl, '\n', sizeof(zsv_uc_vector)); \
38
+ memset(&v.cr, '\r', sizeof(zsv_uc_vector)); \
39
+ memset(&v.qt, scanner->opts.no_quotes > 0 ? 0 : '"', sizeof(v.qt)); \
40
+ } while (0)
41
+ #endif
42
+
43
+ static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner, unsigned char *buff, size_t bytes_read) {
44
+ struct {
45
+ zsv_uc_vector dl;
46
+ zsv_uc_vector nl;
47
+ zsv_uc_vector cr;
48
+ zsv_uc_vector qt;
49
+ } v;
50
+
51
+ size_t i;
52
+ size_t bytes_chunk_end;
53
+ char delimiter;
54
+ unsigned char c;
55
+ char skip_next_delim;
56
+ int quote;
57
+ size_t mask_total_offset;
58
+ zsv_mask_t mask;
59
+ int mask_last_start;
60
+
61
+ #ifdef ZSV_SUPPORT_PULL_PARSER
62
+ if (scanner->pull.regs->delim.location) {
63
+ zsv_internal_restore_regs();
64
+ if (scanner->pull.regs->delim.location == 1)
65
+ goto zsv_cell_and_row_dl_1;
66
+ goto zsv_cell_and_row_dl_2;
67
+ }
68
+ #endif
69
+ bytes_read += scanner->partial_row_length;
70
+ i = scanner->partial_row_length;
71
+ skip_next_delim = 0;
72
+ bytes_chunk_end = bytes_read >= sizeof(zsv_uc_vector) ? bytes_read - sizeof(zsv_uc_vector) + 1 : 0;
73
+ delimiter = scanner->opts.delimiter;
74
+ scanner->partial_row_length = 0;
75
+
76
+ // to do: move into one-time execution code?
77
+ // (but, will also locate away from function stack)
78
+ quote = scanner->opts.no_quotes > 0 ? -1 : '"'; // ascii code 34
79
+ memset(&v.dl, delimiter, sizeof(zsv_uc_vector)); // ascii code 44
80
+ memset(&v.nl, '\n', sizeof(zsv_uc_vector)); // ascii code 10
81
+ memset(&v.cr, '\r', sizeof(zsv_uc_vector)); // ascii code 13
82
+ memset(&v.qt, scanner->opts.no_quotes > 0 ? 0 : '"', sizeof(v.qt));
83
+
84
+ if (scanner->quoted & ZSV_PARSER_QUOTE_PENDING) {
85
+ // if we're here, then the last chunk we read ended with a lone quote char inside
86
+ // a quoted cell, and we are waiting to find out whether it is followed by
87
+ // another dbl-quote e.g. if the end of the last chunk is |, we had:
88
+ // ...,"hel"|"o"
89
+ // ...,"hel"|,...
90
+ // ...,"hel"|p,...
91
+ scanner->quoted -= ZSV_PARSER_QUOTE_PENDING;
92
+ if (buff[i] != quote) {
93
+ scanner->quoted |= ZSV_PARSER_QUOTE_CLOSED;
94
+ scanner->quoted &= ~ZSV_PARSER_QUOTE_UNCLOSED; // scanner->quoted -= ZSV_PARSER_QUOTE_UNCLOSED;
95
+ scanner->quote_close_position = i - scanner->cell_start - 1;
96
+ } else {
97
+ scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
98
+ scanner->quoted |= ZSV_PARSER_QUOTE_EMBEDDED;
99
+ i++;
100
+ }
101
+ }
102
+
103
+ #define scanner_last (i ? buff[i - 1] : scanner->last)
104
+
105
+ mask_total_offset = 0;
106
+ mask = 0;
107
+ scanner->buffer_end = bytes_read;
108
+ for (; i < bytes_read; i++) {
109
+ if (UNLIKELY(mask == 0)) {
110
+ mask_last_start = i;
111
+ if (VERY_LIKELY(i < bytes_chunk_end)) {
112
+ // keep going until we get a delim or we are at the eof
113
+ mask_total_offset = vec_delims(buff + i, bytes_read - i, &v.dl, &v.nl, &v.cr, &v.qt, &mask);
114
+ if (LIKELY(mask_total_offset != 0)) {
115
+ i += mask_total_offset;
116
+ if (VERY_UNLIKELY(mask == 0 && i == bytes_read))
117
+ break; // vector processing ended on exactly our buffer end
118
+ }
119
+ } else if (skip_next_delim) {
120
+ skip_next_delim = 0;
121
+ continue;
122
+ }
123
+ }
124
+ if (VERY_LIKELY(mask)) {
125
+ size_t next_offset = NEXT_BIT(mask);
126
+ i = mask_last_start + next_offset - 1;
127
+ mask = clear_lowest_bit(mask);
128
+ if (VERY_UNLIKELY(skip_next_delim)) {
129
+ skip_next_delim = 0;
130
+ continue;
131
+ }
132
+ }
133
+
134
+ // to do: consolidate csv and tsv/scanner->delimiter parsers
135
+ c = buff[i];
136
+ if (LIKELY(c == delimiter)) { // case ',':
137
+ if ((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) == 0) {
138
+ scanner->scanned_length = i;
139
+ cell_dl(scanner, buff + scanner->cell_start, i - scanner->cell_start);
140
+ scanner->cell_start = i + 1;
141
+ c = 0;
142
+ continue; // this char is not part of the cell content
143
+ } else
144
+ // we are inside an open quote, which is needed to escape this char
145
+ scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
146
+ } else if (UNLIKELY(c == '\r')) {
147
+ #ifndef ZSV_NO_ONLY_CRLF
148
+ if (VERY_UNLIKELY(scanner->opts.only_crlf_rowend)) {
149
+ if (scanner->quoted & ZSV_PARSER_QUOTE_PENDING_LF)
150
+ // if we already had a lone \r in this cell,
151
+ // flip the flag to ZSV_PARSER_QUOTE_NEEDED
152
+ scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
153
+ else
154
+ // otherwise this is the first \r in this cell,
155
+ // so set ZSV_PARSER_QUOTE_PENDING_LF, which
156
+ // will be removed if the next char is LF
157
+ scanner->quoted |= ZSV_PARSER_QUOTE_PENDING_LF;
158
+ continue;
159
+ }
160
+ #endif
161
+ if ((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) == 0) {
162
+ scanner->scanned_length = i;
163
+ enum zsv_status stat = cell_and_row_dl(scanner, buff + scanner->cell_start, i - scanner->cell_start);
164
+ if (VERY_UNLIKELY(stat))
165
+ return stat;
166
+ #ifdef ZSV_SUPPORT_PULL_PARSER
167
+ if (scanner->pull.now) {
168
+ scanner->pull.now = 0;
169
+ scanner->row.used = scanner->pull.row_used;
170
+ zsv_internal_save_regs(1);
171
+ return zsv_status_row;
172
+ }
173
+ zsv_cell_and_row_dl_1:
174
+ scanner->row.used = 0;
175
+ scanner->pull.regs->delim.location = 0;
176
+ #endif
177
+ scanner->cell_start = i + 1;
178
+ scanner->row_start = i + 1;
179
+ scanner->data_row_count++;
180
+ continue; // this char is not part of the cell content
181
+ } else
182
+ // we are inside an open quote, which is needed to escape this char
183
+ scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
184
+ } else if (UNLIKELY(c == '\n')) {
185
+ if ((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) == 0) {
186
+ int is_crlf = (scanner_last == '\r');
187
+
188
+ // Handle logic for when we should SKIP this char (not a row end)
189
+ #ifndef ZSV_NO_ONLY_CRLF
190
+ if (VERY_UNLIKELY(scanner->opts.only_crlf_rowend)) {
191
+ if (!is_crlf) {
192
+ scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
193
+ continue; // only-crlf mode: ignore lone \n
194
+ } else
195
+ // remove ZSV_PARSER_QUOTE_PENDING_LF if we have it
196
+ scanner->quoted &= ~ZSV_PARSER_QUOTE_PENDING_LF;
197
+ } else
198
+ #endif
199
+ {
200
+ if (is_crlf) {
201
+ // Standard mode: ignore \n because \r already handled the row end
202
+ scanner->cell_start = i + 1;
203
+ scanner->row_start = i + 1;
204
+ continue;
205
+ }
206
+ }
207
+
208
+ // If we reached here, this is a row end
209
+ scanner->scanned_length = i;
210
+
211
+ // Calculate cell length. In only-crlf mode, we must exclude the preceding \r
212
+ size_t cell_len = i - scanner->cell_start;
213
+ #ifndef ZSV_NO_ONLY_CRLF
214
+ if (VERY_UNLIKELY(scanner->opts.only_crlf_rowend))
215
+ cell_len--;
216
+ #endif
217
+ enum zsv_status stat = cell_and_row_dl(scanner, buff + scanner->cell_start, cell_len);
218
+ if (VERY_UNLIKELY(stat))
219
+ return stat;
220
+ #ifdef ZSV_SUPPORT_PULL_PARSER
221
+ if (scanner->pull.now) {
222
+ scanner->pull.now = 0;
223
+ scanner->row.used = scanner->pull.row_used;
224
+ zsv_internal_save_regs(2);
225
+ return zsv_status_row;
226
+ }
227
+ zsv_cell_and_row_dl_2:
228
+ scanner->row.used = 0;
229
+ scanner->pull.regs->delim.location = 0;
230
+ #endif
231
+ scanner->cell_start = i + 1;
232
+ scanner->row_start = i + 1;
233
+ scanner->data_row_count++;
234
+ continue; // this char is not part of the cell content
235
+ } else
236
+ // we are inside an open quote, which is needed to escape this char
237
+ scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
238
+ } else if (LIKELY(c == quote)) {
239
+ if (i == scanner->cell_start && !scanner->buffer_exceeded) {
240
+ scanner->quoted = ZSV_PARSER_QUOTE_UNCLOSED;
241
+ scanner->quote_close_position = 0;
242
+ c = 0;
243
+ } else if (scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) {
244
+ // the cell started with a quote that is not yet closed
245
+ if (VERY_LIKELY(i + 1 < bytes_read)) {
246
+ if (LIKELY(buff[i + 1] != quote)) {
247
+ // buff[i] is the closing quote (not an escaped quote)
248
+ scanner->quoted |= ZSV_PARSER_QUOTE_CLOSED;
249
+ scanner->quoted -= ZSV_PARSER_QUOTE_UNCLOSED;
250
+
251
+ // keep track of closing quote position to handle the edge case
252
+ // where content follows the closing quote e.g. cell content is:
253
+ // "this-cell"-did-not-need-quotes
254
+ if (LIKELY(scanner->quote_close_position == 0))
255
+ scanner->quote_close_position = i - scanner->cell_start;
256
+ } else {
257
+ // next char is also '"'
258
+ // e.g. cell content is: "this "" is a dbl quote"
259
+ // cursor is here => ^
260
+ // include in cell content and don't further process
261
+ scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED;
262
+ scanner->quoted |= ZSV_PARSER_QUOTE_EMBEDDED;
263
+ skip_next_delim = 1;
264
+ }
265
+ } else // we are at the end of this input chunk
266
+ scanner->quoted |= ZSV_PARSER_QUOTE_PENDING;
267
+ } else {
268
+ // cell_length > 0 and cell did not start w quote, so
269
+ // we have a quote in middle of an unquoted cell
270
+ // process as a normal char
271
+ scanner->quoted |= ZSV_PARSER_QUOTE_EMBEDDED;
272
+ scanner->quote_close_position = scanner->quoted & ZSV_PARSER_QUOTE_CLOSED ? scanner->quote_close_position : 0;
273
+ }
274
+ }
275
+ }
276
+ scanner->scanned_length = i;
277
+
278
+ // save bytes_read-- we will need to shift any remaining partial row
279
+ // before we read next from our input. however, we intentionally refrain
280
+ // from doing this until the next parse_more() call, so that the entirety
281
+ // of all rows parsed thus far are still available until that next call
282
+ scanner->old_bytes_read = bytes_read;
283
+
284
+ return zsv_status_ok;
285
+ }
@@ -0,0 +1,88 @@
1
+ static inline char row_fx(struct zsv_scanner *scanner, unsigned char *buff, size_t row_start, size_t row_end) {
2
+ size_t cell_start = row_start;
3
+ size_t row_length = row_end - row_start;
4
+ for (unsigned i = 0; i < scanner->fixed.count; i++) {
5
+ size_t cell_end = row_start + (scanner->fixed.offsets[i] > row_length ? row_length : scanner->fixed.offsets[i]);
6
+ size_t cell_length = cell_end - cell_start;
7
+ unsigned char *s = buff + cell_start;
8
+ if (UNLIKELY(scanner->opts.cell_handler != NULL))
9
+ scanner->opts.cell_handler(scanner->opts.ctx, s, cell_length);
10
+ struct zsv_cell c = {s, cell_length, 1, 0};
11
+ scanner->row.cells[scanner->row.used++] = c;
12
+
13
+ cell_start = cell_end;
14
+ }
15
+ if (VERY_LIKELY(scanner->opts.row_handler != NULL))
16
+ scanner->opts.row_handler(scanner->opts.ctx);
17
+ scanner->row.used = 0;
18
+ return scanner->abort;
19
+ }
20
+
21
+ static enum zsv_status zsv_scan_fixed(struct zsv_scanner *scanner, unsigned char *buff, size_t bytes_read) {
22
+ bytes_read += scanner->partial_row_length;
23
+ unsigned char c;
24
+ size_t bytes_chunk_end = bytes_read >= sizeof(zsv_uc_vector) ? bytes_read - sizeof(zsv_uc_vector) + 1 : 0;
25
+
26
+ scanner->partial_row_length = 0;
27
+
28
+ // dl_v and qt_v are unused, we just leave them to reuse vec_delims()
29
+ zsv_uc_vector dl_v;
30
+ memset(&dl_v, 0, sizeof(zsv_uc_vector));
31
+ zsv_uc_vector nl_v;
32
+ memset(&nl_v, '\n', sizeof(zsv_uc_vector));
33
+ zsv_uc_vector cr_v;
34
+ memset(&cr_v, '\r', sizeof(zsv_uc_vector));
35
+ zsv_uc_vector qt_v;
36
+ memset(&qt_v, 0, sizeof(zsv_uc_vector));
37
+ size_t mask_total_offset = 0;
38
+ zsv_mask_t mask = 0;
39
+ int mask_last_start = 0;
40
+
41
+ scanner->buffer_end = bytes_read;
42
+ for (size_t i = scanner->partial_row_length;; i++) {
43
+ if (UNLIKELY(mask == 0)) {
44
+ mask_last_start = i;
45
+ if (LIKELY(i < bytes_chunk_end)) {
46
+ // keep going until we get a delim or we are at the eof
47
+ mask_total_offset = vec_delims(buff + i, bytes_read - i, &dl_v, &nl_v, &cr_v, &qt_v, &mask);
48
+ if (mask_total_offset)
49
+ i += mask_total_offset;
50
+ } else { // we only have a few bytes left, so manually parse
51
+ for (unsigned i2 = i; i2 < bytes_read; i2++)
52
+ if (strchr("\n\r", buff[i2]))
53
+ mask += 1 << (i2 - i);
54
+ }
55
+ if (UNLIKELY(mask == 0))
56
+ break;
57
+ }
58
+
59
+ size_t next_offset = __builtin_ffs(mask);
60
+ i = mask_last_start + next_offset - 1;
61
+ mask = clear_lowest_bit(mask);
62
+
63
+ c = buff[i];
64
+ if (LIKELY(c == '\n')) {
65
+ if (scanner_last == '\r') { // ignore; we are outside a cell and last char was rowend
66
+ scanner->row_start = i + 1;
67
+ } else {
68
+ // this is a row end
69
+ scanner->scanned_length = i;
70
+ if (VERY_UNLIKELY(row_fx(scanner, buff, scanner->row_start, i)))
71
+ return zsv_status_cancelled; // abort
72
+ scanner->row_start = i + 1;
73
+ }
74
+ } else if (UNLIKELY(c == '\r')) {
75
+ scanner->scanned_length = i;
76
+ if (VERY_UNLIKELY(row_fx(scanner, buff, scanner->row_start, i)))
77
+ return zsv_status_cancelled;
78
+ scanner->row_start = i + 1;
79
+ }
80
+ }
81
+
82
+ // save bytes_read-- we will need to shift any remaining partial row
83
+ // before we read next from our input. however, we intentionally refrain
84
+ // from doing this until the next parse_more() call, so that the entirety
85
+ // of all rows parsed thus far are still available until that next call
86
+ scanner->old_bytes_read = bytes_read;
87
+ return zsv_status_ok;
88
+ }
@@ -0,0 +1,51 @@
1
+ /*
2
+ * zsv_strencode(): standalone file to allow zsv utilities that use this
3
+ * to be used on a standalone basis without the zsv parser
4
+ *
5
+ * This file is part of zsv/lib, distributed under the license defined at
6
+ * https://opensource.org/licenses/MIT
7
+ */
8
+
9
+ #include <zsv/utils/utf8.h>
10
+ #include <zsv/utils/compiler.h>
11
+
12
+ /**
13
+ * Ensure valid UTF8 encoding by, if needed, replacing malformed bytes
14
+ */
15
+ ZSV_EXPORT
16
+ size_t zsv_strencode(unsigned char *s, size_t n, unsigned char replace,
17
+ int (*malformed_handler)(void *, const unsigned char *s, size_t n, size_t offset),
18
+ void *handler_ctx) {
19
+ size_t new_len = 0;
20
+ int clen;
21
+ for (size_t i2 = 0; i2 < n; i2 += (size_t)clen) {
22
+ clen = ZSV_UTF8_CHARLEN(s[i2]);
23
+ if (LIKELY(clen == 1))
24
+ s[new_len++] = s[i2];
25
+ else if (UNLIKELY(clen < 0) || UNLIKELY(i2 + clen > n)) {
26
+ if (malformed_handler)
27
+ malformed_handler(handler_ctx, s, n, new_len);
28
+ if (replace)
29
+ s[new_len++] = replace;
30
+ clen = 1;
31
+ } else { /* might be valid multi-byte utf8; check */
32
+ unsigned char valid_n;
33
+ for (valid_n = 1; valid_n < clen; valid_n++)
34
+ if (!ZSV_UTF8_SUBSEQUENT_CHAR_OK(s[i2 + valid_n]))
35
+ break;
36
+ if (valid_n == clen) { /* valid_n utf8; copy it */
37
+ memmove(s + new_len, s + i2, clen);
38
+ new_len += clen;
39
+ } else { /* invalid; valid_n smaller than expected */
40
+ if (malformed_handler)
41
+ malformed_handler(handler_ctx, s, n, new_len);
42
+ if (replace) {
43
+ memset(s + new_len, replace, valid_n);
44
+ new_len += valid_n;
45
+ }
46
+ clen = valid_n;
47
+ }
48
+ }
49
+ }
50
+ return new_len; // new length
51
+ }