zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,404 @@
1
+ /*
2
+ * Copyright (C) 2021 Liquidaty and the zsv/lib contributors
3
+ * All rights reserved
4
+ *
5
+ * This file is part of zsv/lib, distributed under the license defined at
6
+ * https://opensource.org/licenses/MIT
7
+ */
8
+
9
+ #include <stdio.h>
10
+ #include <string.h>
11
+ #include <stdlib.h>
12
+ #include <pthread.h>
13
+ #include <sys/types.h> // off_t
14
+
15
+ #define ZSV_COMMAND count
16
+ #include "zsv_command.h"
17
+ #include <zsv/utils/file.h>
18
+ #include <zsv/utils/os.h> // zsv_get_number_of_cores
19
+ #include "utils/chunk.h"
20
+
21
+ #define ZSV_COUNT_PARALLEL_MIN_BYTES (1024 * 1024 * 2)
22
+
23
+ struct zsv_chunk_count_data {
24
+ unsigned int id;
25
+ size_t start_offset;
26
+ size_t end_offset;
27
+
28
+ size_t actual_next_row_start;
29
+ size_t row_count;
30
+ int status;
31
+
32
+ const char *input_path;
33
+ struct zsv_opts *opts_template;
34
+
35
+ int skip;
36
+ };
37
+
38
+ struct zsv_count_parallel_data {
39
+ unsigned int chunk_count;
40
+ struct zsv_chunk_count_data *chunks;
41
+ pthread_t *threads;
42
+ };
43
+
44
+ struct data {
45
+ zsv_parser parser;
46
+ size_t rows;
47
+
48
+ struct zsv_opts *opts;
49
+ const char *input_path;
50
+ unsigned int num_chunks;
51
+
52
+ int run_in_parallel;
53
+ int cancelled;
54
+ #ifndef ZSV_NO_PARALLEL
55
+ struct zsv_count_parallel_data *pdata;
56
+ size_t end_offset_limit; // where this chunk (chunk 0) should stop
57
+ size_t next_row_start; // where chunk 0 actually ended
58
+ #endif
59
+ };
60
+
61
+ #ifndef ZSV_NO_PARALLEL
62
+ static void *process_chunk_internal(struct zsv_chunk_count_data *cdata);
63
+
64
+ static struct zsv_count_parallel_data *parallel_data_new(unsigned int count) {
65
+ struct zsv_count_parallel_data *pd = calloc(1, sizeof(*pd));
66
+ if (!pd)
67
+ return NULL;
68
+ pd->chunk_count = count;
69
+ pd->chunks = calloc(count, sizeof(*pd->chunks));
70
+ pd->threads = calloc(count, sizeof(*pd->threads));
71
+ if (!pd->chunks || !pd->threads) {
72
+ free(pd->chunks);
73
+ free(pd->threads);
74
+ free(pd);
75
+ return NULL;
76
+ }
77
+ return pd;
78
+ }
79
+
80
+ static void parallel_data_delete(struct zsv_count_parallel_data *pd) {
81
+ if (pd) {
82
+ free(pd->chunks);
83
+ free(pd->threads);
84
+ free(pd);
85
+ }
86
+ }
87
+
88
+ #endif
89
+
90
+ /* serial (non-parallelized) row handlers */
91
+ static void row_verbose(void *ctx) {
92
+ struct data *data = ctx;
93
+ data->rows++;
94
+ if (data->rows % 1000000 == 0)
95
+ fprintf(stderr, "Processed %zu data rows\n", data->rows / 1000000);
96
+ }
97
+
98
+ static void row_simple(void *ctx) {
99
+ ((struct data *)ctx)->rows++;
100
+ }
101
+
102
+ #ifndef ZSV_NO_PARALLEL
103
+ /* parallelized row handers */
104
+ static void row_parallel_done(void *ctx) {
105
+ struct data *data = ctx;
106
+ // Find start of the next row
107
+ data->next_row_start = zsv_cum_scanned_length(data->parser) - zsv_row_length_raw_bytes(data->parser);
108
+ zsv_abort(data->parser);
109
+ data->cancelled = 1;
110
+ }
111
+
112
+ static void row_parallel(void *ctx) {
113
+ struct data *data = ctx;
114
+ data->rows++;
115
+
116
+ if (UNLIKELY((off_t)zsv_cum_scanned_length(data->parser) >= data->end_offset_limit)) {
117
+ // We crossed the boundary. We must finish this row, then stop.
118
+ // Switch handler to 'done' to catch the exact end of this row.
119
+ zsv_set_row_handler(data->parser, row_parallel_done);
120
+ }
121
+ }
122
+
123
+ struct worker_ctx {
124
+ struct zsv_chunk_count_data *cdata;
125
+ zsv_parser parser;
126
+ size_t limit_len;
127
+ int cancelled;
128
+ };
129
+
130
+ static void worker_row_done(void *ctx) {
131
+ struct worker_ctx *wctx = ctx;
132
+ // Calculate absolute offset of the *next* row start
133
+ size_t scanned = zsv_cum_scanned_length(wctx->parser);
134
+ wctx->cdata->actual_next_row_start = wctx->cdata->start_offset + scanned - zsv_row_length_raw_bytes(wctx->parser);
135
+ zsv_abort(wctx->parser);
136
+ wctx->cancelled = 1;
137
+ }
138
+
139
+ static void worker_row(void *ctx) {
140
+ struct worker_ctx *wctx = ctx;
141
+ wctx->cdata->row_count++;
142
+
143
+ if (UNLIKELY((off_t)zsv_cum_scanned_length(wctx->parser) >= wctx->limit_len)) {
144
+ zsv_set_row_handler(wctx->parser, worker_row_done);
145
+ }
146
+ }
147
+
148
+ static void *process_chunk_thread(void *arg) {
149
+ struct zsv_chunk_count_data *cdata = arg;
150
+ return process_chunk_internal(cdata);
151
+ }
152
+
153
+ static void *process_chunk_internal(struct zsv_chunk_count_data *cdata) {
154
+ cdata->row_count = 0;
155
+ cdata->status = 0;
156
+
157
+ if (cdata->start_offset >= cdata->end_offset) {
158
+ cdata->actual_next_row_start = cdata->start_offset;
159
+ cdata->skip = 1;
160
+ return NULL;
161
+ }
162
+
163
+ struct zsv_opts opts = *cdata->opts_template;
164
+ struct worker_ctx wctx = {0};
165
+ wctx.cdata = cdata;
166
+ wctx.limit_len = cdata->end_offset - cdata->start_offset;
167
+
168
+ FILE *f = fopen(cdata->input_path, "rb");
169
+ if (!f) {
170
+ cdata->status = zsv_status_error;
171
+ return NULL;
172
+ }
173
+
174
+ if (fseeko(f, cdata->start_offset, SEEK_SET) != 0) {
175
+ fclose(f);
176
+ cdata->status = zsv_status_error;
177
+ return NULL;
178
+ }
179
+
180
+ opts.stream = f;
181
+ opts.ctx = &wctx;
182
+ opts.row_handler = worker_row;
183
+
184
+ wctx.parser = zsv_new(&opts);
185
+ if (wctx.parser == NULL) {
186
+ fclose(f);
187
+ cdata->status = zsv_status_error;
188
+ return NULL;
189
+ }
190
+
191
+ enum zsv_status status = zsv_status_ok;
192
+ while (status == zsv_status_ok && !wctx.cancelled) {
193
+ status = zsv_parse_more(wctx.parser);
194
+ }
195
+
196
+ // if finished naturally (eof)
197
+ if (!wctx.cancelled) {
198
+ cdata->actual_next_row_start = cdata->start_offset + zsv_cum_scanned_length(wctx.parser);
199
+ }
200
+
201
+ zsv_finish(wctx.parser);
202
+ zsv_delete(wctx.parser);
203
+ fclose(f);
204
+ return NULL;
205
+ }
206
+ #endif
207
+
208
+ static void header_handler(void *ctx) {
209
+ struct data *data = ctx;
210
+ #ifndef ZSV_NO_PARALLEL
211
+ if (data->input_path && data->num_chunks > 1) {
212
+ size_t header_end = zsv_cum_scanned_length(data->parser);
213
+ struct zsv_chunk_position *offsets =
214
+ zsv_guess_file_chunks(data->input_path, data->num_chunks, ZSV_COUNT_PARALLEL_MIN_BYTES, header_end
215
+ #ifndef ZSV_NO_ONLY_CRLF
216
+ ,
217
+ data->opts->only_crlf_rowend
218
+ #endif
219
+ );
220
+
221
+ if (offsets) {
222
+ data->pdata = parallel_data_new(data->num_chunks);
223
+ if (!data->pdata) {
224
+ fprintf(stderr, "Out of memory!\n");
225
+ zsv_free_chunks(offsets);
226
+ } else {
227
+ data->run_in_parallel = 1;
228
+ if (data->opts->verbose) {
229
+ for (unsigned int i = 0; i < data->num_chunks; i++) {
230
+ fprintf(stderr, "Chunk %i: %zu - %zu\n", i + 1, offsets[i].start, offsets[i].end);
231
+ }
232
+ }
233
+
234
+ /* set up worker chunks (1..n) */
235
+ for (unsigned int i = 1; i < data->num_chunks; i++) {
236
+ struct zsv_chunk_count_data *c = &data->pdata->chunks[i];
237
+ c->id = i;
238
+ c->start_offset = offsets[i].start;
239
+ c->end_offset = offsets[i].end;
240
+ c->input_path = data->input_path;
241
+ c->opts_template = data->opts;
242
+
243
+ if (pthread_create(&data->pdata->threads[i], NULL, process_chunk_thread, c) != 0) {
244
+ fprintf(stderr, "Error creating thread %d\n", i);
245
+ data->run_in_parallel = 0;
246
+ break;
247
+ }
248
+ }
249
+
250
+ if (data->run_in_parallel) {
251
+ data->end_offset_limit = offsets[0].end;
252
+ zsv_set_row_handler(data->parser, row_parallel);
253
+ data->run_in_parallel = 1;
254
+ }
255
+ }
256
+ zsv_free_chunks(offsets);
257
+ }
258
+ }
259
+ #endif
260
+
261
+ if (!data->run_in_parallel) { // single-threaded serial run
262
+ data->run_in_parallel = 0;
263
+ zsv_set_row_handler(data->parser, data->opts->verbose ? row_verbose : row_simple);
264
+ }
265
+ }
266
+
267
+ static int count_usage(void) {
268
+ static const char *usage = "Usage: count [options]\n"
269
+ "\n"
270
+ "Options:\n"
271
+ " -h,--help : show usage\n"
272
+ " -i,--input <filename> : use specified file input\n"
273
+ #ifndef ZSV_NO_PARALLEL
274
+ " -j,--jobs <n> : number of jobs (parallel threads)\n"
275
+ " --parallel : use all available cores\n"
276
+ #endif
277
+ ;
278
+ printf("%s\n", usage);
279
+ return 0;
280
+ }
281
+
282
+ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
283
+ struct zsv_prop_handler *custom_prop_handler) {
284
+ struct data data = {0};
285
+ struct zsv_opts opts = *optsp;
286
+ data.opts = &opts;
287
+
288
+ int err = 0;
289
+ for (int i = 1; !err && i < argc; i++) {
290
+ const char *arg = argv[i];
291
+ if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
292
+ count_usage();
293
+ goto count_done;
294
+ }
295
+ if (!strcmp(arg, "-i") || !strcmp(arg, "--input") || *arg != '-') {
296
+ err = 1;
297
+ if ((!strcmp(arg, "-i") || !strcmp(arg, "--input")) && ++i >= argc)
298
+ fprintf(stderr, "%s option requires a filename\n", arg);
299
+ else {
300
+ if (opts.stream)
301
+ fprintf(stderr, "Input may not be specified more than once\n");
302
+ else if (!(opts.stream = fopen(argv[i], "rb")))
303
+ fprintf(stderr, "Unable to open for reading: %s\n", argv[i]);
304
+ else {
305
+ data.input_path = argv[i];
306
+ err = 0;
307
+ }
308
+ }
309
+ #ifndef ZSV_NO_PARALLEL
310
+ } else if (!strcmp(arg, "-j") || !strcmp(arg, "--jobs")) {
311
+ if (++i >= argc)
312
+ err = 1;
313
+ else
314
+ data.num_chunks = atoi(argv[i]);
315
+ } else if (!strcmp(arg, "--parallel")) {
316
+ data.num_chunks = zsv_get_number_of_cores();
317
+ if (data.num_chunks < 2) {
318
+ fprintf(stderr, "Warning: --parallel specified but only one core found; using -j 4 instead");
319
+ data.num_chunks = 4;
320
+ }
321
+ #endif
322
+ } else {
323
+ fprintf(stderr, "Unrecognized option: %s\n", arg);
324
+ err = 1;
325
+ }
326
+ }
327
+
328
+ #ifdef NO_STDIN
329
+ if (!opts.stream || opts.stream == stdin) {
330
+ fprintf(stderr, "Please specify an input file\n");
331
+ err = 1;
332
+ }
333
+ #endif
334
+ #ifndef ZSV_NO_PARALLEL
335
+ if (data.num_chunks > 1) {
336
+ enum zsv_chunk_status chstat = zsv_chunkable(data.input_path, &opts);
337
+ if (chstat != zsv_chunk_status_ok) {
338
+ fprintf(stderr, "%s\n", zsv_chunk_status_str(chstat));
339
+ err = 1;
340
+ }
341
+ }
342
+ #endif
343
+ if (!err) {
344
+ opts.row_handler = header_handler;
345
+ opts.ctx = &data;
346
+
347
+ if (zsv_new_with_properties(&opts, custom_prop_handler, data.input_path, &data.parser) != zsv_status_ok) {
348
+ fprintf(stderr, "Unable to initialize parser\n");
349
+ err = 1;
350
+ } else {
351
+ enum zsv_status status;
352
+
353
+ /* Main Parse Loop */
354
+ while (!data.cancelled && (status = zsv_parse_more(data.parser)) == zsv_status_ok)
355
+ ;
356
+ zsv_finish(data.parser);
357
+
358
+ #ifndef ZSV_NO_PARALLEL
359
+ if (data.run_in_parallel) {
360
+ if (!data.next_row_start)
361
+ // not likely to get here but just in case
362
+ data.next_row_start = zsv_cum_scanned_length(data.parser);
363
+
364
+ size_t total_rows = data.rows;
365
+ // aggregate results
366
+ for (unsigned int i = 1; i < data.num_chunks; i++) {
367
+ pthread_join(data.pdata->threads[i], NULL);
368
+
369
+ struct zsv_chunk_count_data *prev_chunk = (i == 1) ? NULL : &data.pdata->chunks[i - 1];
370
+ struct zsv_chunk_count_data *curr_chunk = &data.pdata->chunks[i];
371
+
372
+ // determine where the previous chunk actually ended
373
+ size_t prev_end = (i == 1) ? data.next_row_start : prev_chunk->actual_next_row_start;
374
+ // check overlap
375
+ if (prev_end > curr_chunk->start_offset) {
376
+ if (data.opts->verbose) {
377
+ fprintf(stderr, "Overlap detected at chunk %u (expected %zu, got %zu). Reprocessing.\n", i,
378
+ curr_chunk->start_offset, prev_end);
379
+ }
380
+ // reprocess synchronously
381
+ curr_chunk->start_offset = prev_end;
382
+ process_chunk_internal(curr_chunk);
383
+ }
384
+
385
+ total_rows += curr_chunk->row_count;
386
+ }
387
+
388
+ printf("%zu\n", total_rows);
389
+ parallel_data_delete(data.pdata);
390
+
391
+ } else
392
+ #endif
393
+ // result from running serially
394
+ printf("%zu\n", data.rows);
395
+ zsv_delete(data.parser);
396
+ }
397
+ }
398
+
399
+ count_done:
400
+ if (opts.stream && opts.stream != stdin)
401
+ fclose(opts.stream);
402
+
403
+ return err;
404
+ }