zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,228 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <stdlib.h>
4
+ #include <unistd.h> // unlink()
5
+
6
+ #include <errno.h>
7
+ #include <zsv/utils/cache.h>
8
+ #include <zsv/utils/jq.h>
9
+
10
+ #ifndef APPNAME
11
+ #define APPNAME "cache"
12
+ #endif
13
+
14
+ #include <zsv/utils/os.h>
15
+ #include <zsv/utils/err.h>
16
+ #include <zsv/utils/dirs.h>
17
+ #include <zsv/utils/file.h>
18
+
19
+ static const char *zsv_cache_type_name(enum zsv_cache_type t) {
20
+ switch (t) {
21
+ case zsv_cache_type_property:
22
+ return ZSV_CACHE_PROPERTIES_NAME ".json";
23
+ case zsv_cache_type_tag:
24
+ return "tag.json";
25
+ case zsv_cache_type_overwrite:
26
+ return "overwrite.sqlite3";
27
+ default:
28
+ return NULL;
29
+ }
30
+ }
31
+
32
+ unsigned char *zsv_cache_filepath(const unsigned char *data_filepath, enum zsv_cache_type type, char create_dir,
33
+ char temp_file) {
34
+ if (!data_filepath || !*data_filepath)
35
+ return NULL;
36
+
37
+ const char *cache_filename_base = zsv_cache_type_name(type);
38
+ if (!cache_filename_base) {
39
+ zsv_printerr(ENOMEM, "Out of memory!");
40
+ return NULL;
41
+ }
42
+
43
+ unsigned char *cache_filename;
44
+ asprintf((char **)&cache_filename, "%s%s", cache_filename_base, temp_file ? ZSV_TEMPFILE_SUFFIX : "");
45
+
46
+ unsigned char *s = cache_filename ? zsv_cache_path(data_filepath, cache_filename, 0) : NULL;
47
+ if (s && create_dir) {
48
+ char *last_slash_s = (char *)strrchr((void *)s, FILESLASH);
49
+ int err = 0;
50
+
51
+ // temporarily truncate the string so as to only leave its parent folder
52
+ *last_slash_s = '\0';
53
+
54
+ // ensure the parent dir exists
55
+ if (!zsv_dir_exists((char *)s))
56
+ err = zsv_mkdirs((char *)s, 0);
57
+ if (err) {
58
+ fprintf(stderr, "Unable to create cache directory %s\n", s);
59
+ free(s);
60
+ s = NULL;
61
+ } else
62
+ *last_slash_s = FILESLASH;
63
+ }
64
+
65
+ free(cache_filename);
66
+ return s;
67
+ }
68
+
69
+ /*
70
+ * print cache file to stdout
71
+ */
72
+ int zsv_cache_print(const unsigned char *filepath, enum zsv_cache_type ctype, const unsigned char *default_value) {
73
+ int err = 0;
74
+ // to do: parse the json rather than just blindly regurgitating the file
75
+ unsigned char *cache_fn = zsv_cache_filepath(filepath, ctype, 0, 0);
76
+ if (cache_fn) {
77
+ FILE *f;
78
+ if (zsv_file_readable((char *)cache_fn, &err, &f)) {
79
+ char buff[1024];
80
+ size_t bytes;
81
+ while ((bytes = fread(buff, 1, sizeof(buff), f)))
82
+ fwrite(buff, 1, bytes, stdout);
83
+ fclose(f);
84
+ } else if (err == ENOENT) {
85
+ if (default_value)
86
+ printf("%s\n", default_value);
87
+ } else {
88
+ perror((const char *)cache_fn);
89
+ if (!err)
90
+ err = 1;
91
+ }
92
+ }
93
+ free(cache_fn);
94
+ return err;
95
+ }
96
+
97
+ /*
98
+ * remove a cache file
99
+ */
100
+
101
+ int zsv_cache_remove(const unsigned char *filepath, enum zsv_cache_type ctype) {
102
+ int err = 0;
103
+ unsigned char *fn = zsv_cache_filepath(filepath, ctype, 0, 0);
104
+ if (!fn)
105
+ err = ENOMEM;
106
+ else if (zsv_file_readable((const char *)fn, &err, NULL)) {
107
+ err = unlink((const char *)fn);
108
+ if (err)
109
+ perror((const char *)fn);
110
+ } else if (err == ENOENT)
111
+ err = 0; // file d.n. exist, nothing to do
112
+ else
113
+ perror((const char *)fn);
114
+ free(fn);
115
+ return err;
116
+ }
117
+
118
+ /*
119
+ * modify a JSON cache file, write to tmp file, then replace the cache file
120
+ */
121
+ int zsv_modify_cache_file(const unsigned char *filepath, enum zsv_cache_type ctype, const unsigned char *json_value1,
122
+ const unsigned char *json_value2, const unsigned char *filter) {
123
+ unsigned char *cache_fn = zsv_cache_filepath((const unsigned char *)filepath, ctype, 0, 0);
124
+ unsigned char *cache_tmp_fn = zsv_cache_filepath((const unsigned char *)filepath, ctype, 1, 1);
125
+ FILE *cache_data = NULL;
126
+ if (!(cache_fn && cache_tmp_fn))
127
+ return zsv_printerr(ENOMEM, "Out of memory!");
128
+
129
+ cache_data = zsv_fopen((void *)cache_fn, "rb");
130
+ int err = 0;
131
+ if (!cache_data) {
132
+ err = errno;
133
+ if (err == ENOENT)
134
+ err = 0;
135
+ else { // file exists but could not be opened
136
+ perror((const char *)cache_fn);
137
+ return err;
138
+ }
139
+ }
140
+
141
+ if (cache_data) {
142
+ // check that we have at least 1 byte of data
143
+ fseek(cache_data, 1, SEEK_SET);
144
+ if (!ftell(cache_data)) { // empty file; will use default value of "{}"
145
+ fclose(cache_data);
146
+ cache_data = NULL;
147
+ } else
148
+ fseek(cache_data, 0, SEEK_SET);
149
+ }
150
+
151
+ // jq filter to apply to [current_properties, id, value]
152
+ FILE *tmp = zsv_fopen((const char *)cache_tmp_fn, "wb");
153
+ if (!tmp) {
154
+ if (!(err = errno))
155
+ err = 1;
156
+ perror((const char *)cache_tmp_fn);
157
+ } else {
158
+ struct jv_to_json_ctx ctx = {0};
159
+ ctx.write1 = zsv_jq_fwrite1;
160
+ ctx.ctx = tmp;
161
+ ctx.flags = JV_PRINT_PRETTY | JV_PRINT_SPACE1;
162
+ enum zsv_jq_status jqstat;
163
+ void *jqh = zsv_jq_new(filter, jv_to_json_func, &ctx, &jqstat);
164
+ if (jqstat || !jqh)
165
+ err = zsv_printerr(-1, "Unable to initialize jq filter");
166
+ else if (!(jqstat = zsv_jq_parse(jqh, "[", 1))) {
167
+ if (cache_data)
168
+ jqstat = zsv_jq_parse_file(jqh, cache_data);
169
+ else
170
+ jqstat = zsv_jq_parse(jqh, "{}", 2);
171
+ if (!jqstat && !(jqstat = zsv_jq_parse(jqh, ",", 1)) &&
172
+ !(jqstat = zsv_jq_parse(jqh, json_value1, strlen((void *)json_value1))) &&
173
+ !(jqstat = zsv_jq_parse(jqh, ",", 1)) &&
174
+ !(jqstat = zsv_jq_parse(jqh, json_value2, strlen((void *)json_value2))) &&
175
+ !(jqstat = zsv_jq_parse(jqh, "]", 1)) && !(jqstat = zsv_jq_finish(jqh))) {
176
+ ;
177
+ }
178
+ }
179
+ zsv_jq_delete(jqh);
180
+
181
+ if (cache_data) {
182
+ fclose(cache_data);
183
+ cache_data = NULL;
184
+ }
185
+ fclose(tmp);
186
+
187
+ if (!jqstat && zsv_replace_file(cache_tmp_fn, cache_fn)) {
188
+ err = zsv_printerr(-1, "Unable to save %s: ", cache_fn);
189
+ zsv_perror(NULL);
190
+ }
191
+ }
192
+
193
+ if (cache_data)
194
+ fclose(cache_data);
195
+ free(cache_fn);
196
+ free(cache_tmp_fn);
197
+ return err;
198
+ }
199
+
200
+ /**
201
+ * Returns the folder or file path to the cache for a given data file
202
+ * Caller must free the returned result
203
+ */
204
+ unsigned char *zsv_cache_path(const unsigned char *data_filepath, const unsigned char *cache_filename, char temp_file) {
205
+ if (!data_filepath)
206
+ return NULL;
207
+ const unsigned char *last_slash = (void *)strrchr((void *)data_filepath, '/');
208
+ const unsigned char *last_backslash = (void *)strrchr((void *)data_filepath, '\\');
209
+ const unsigned char *dir_end = (!last_slash && !last_backslash ? NULL
210
+ : last_backslash > last_slash ? last_backslash
211
+ : last_slash);
212
+ char *s = NULL;
213
+ char *filename_suffix = NULL;
214
+ if (cache_filename)
215
+ asprintf(&filename_suffix, "%c%s%s", FILESLASH, cache_filename, temp_file ? ZSV_TEMPFILE_SUFFIX : "");
216
+
217
+ if (!dir_end) // file is in current dir
218
+ asprintf(&s, ZSV_CACHE_DIR "%c%s%s", FILESLASH, data_filepath, filename_suffix ? filename_suffix : "");
219
+ else if (dir_end[1]) {
220
+ asprintf(&s, "%.*s%c" ZSV_CACHE_DIR "%c%s%s", (int)(dir_end - data_filepath), data_filepath, FILESLASH, FILESLASH,
221
+ dir_end + 1, filename_suffix ? filename_suffix : "");
222
+ for (int i = 0; s && s[i]; i++)
223
+ if (s[i] != FILESLASH && (s[i] == '/' || s[i] == '\\'))
224
+ s[i] = FILESLASH;
225
+ }
226
+ free(filename_suffix);
227
+ return (unsigned char *)s;
228
+ }
@@ -0,0 +1,91 @@
1
+ #include <stdio.h>
2
+ #include <sys/types.h>
3
+ #include <fcntl.h>
4
+ #include <unistd.h>
5
+ #include <stdlib.h> // malloc/free
6
+ #include <errno.h> // error reporting
7
+
8
+ // Define a reasonable buffer size for the buffered copy
9
+ #define COPY_BUFFER_SIZE (1024 * 64) // 64KB
10
+
11
+ #ifdef _WIN32
12
+ // Windows target (via mingw64)
13
+ #include <io.h> // _get_osfhandle
14
+ #include <windows.h> // HANDLE, ReadFile, WriteFile
15
+ #else
16
+ // POSIX target (Linux/macOS)
17
+ #include <sys/stat.h> // fstat, stat
18
+ // #include <sys/uio.h> // macOS/BSD sendfile() definition
19
+ #ifdef __linux__
20
+ #include <sys/sendfile.h> // only on Linux
21
+ #endif
22
+ #endif
23
+
24
+ // concatenate two files. if possible, use zero-copy via sendfile
25
+ long zsv_concatenate_copy(int out_fd, int in_fd, off_t size) {
26
+ long total_written = 0;
27
+
28
+ #ifdef _WIN32
29
+ // --- windows: buffered copy via native apis
30
+ HANDLE hOut = (HANDLE)_get_osfhandle(out_fd);
31
+ HANDLE hIn = (HANDLE)_get_osfhandle(in_fd);
32
+ if (hOut == INVALID_HANDLE_VALUE || hIn == INVALID_HANDLE_VALUE)
33
+ return -1;
34
+
35
+ char *buffer = malloc(COPY_BUFFER_SIZE);
36
+ if (!buffer)
37
+ return -1;
38
+
39
+ DWORD bytes_read, bytes_written;
40
+ BOOL result;
41
+
42
+ while (total_written < size) {
43
+ DWORD bytes_to_read =
44
+ (DWORD)((size - total_written < COPY_BUFFER_SIZE) ? (size - total_written) : COPY_BUFFER_SIZE);
45
+
46
+ result = ReadFile(hIn, buffer, bytes_to_read, &bytes_read, NULL);
47
+ if (!result || bytes_read == 0) {
48
+ free(buffer);
49
+ return -1;
50
+ }
51
+
52
+ result = WriteFile(hOut, buffer, bytes_read, &bytes_written, NULL);
53
+ if (!result || bytes_written != bytes_read) {
54
+ free(buffer);
55
+ return -1;
56
+ }
57
+
58
+ total_written += bytes_written;
59
+ }
60
+
61
+ free(buffer);
62
+ return total_written;
63
+
64
+ #elif defined(__linux__)
65
+ // --- linux: zero-copy! ---
66
+ off_t offset = 0;
67
+ long bytes_to_copy = size;
68
+ // sendfile: target_fd, source_fd, offset*, count
69
+ long result = sendfile(out_fd, in_fd, &offset, bytes_to_copy);
70
+ return result;
71
+
72
+ #else
73
+ (void)(size);
74
+ // --- generic posix fallback (buffered copy) ---
75
+ char *buffer = malloc(COPY_BUFFER_SIZE);
76
+ if (!buffer)
77
+ return -1;
78
+
79
+ ssize_t read_bytes, write_bytes;
80
+ while ((read_bytes = read(in_fd, buffer, COPY_BUFFER_SIZE)) > 0) {
81
+ write_bytes = write(out_fd, buffer, read_bytes);
82
+ if (write_bytes != read_bytes) {
83
+ free(buffer);
84
+ return -1;
85
+ }
86
+ total_written += write_bytes;
87
+ }
88
+ free(buffer);
89
+ return (read_bytes == 0) ? total_written : -1;
90
+ #endif
91
+ }
@@ -0,0 +1,240 @@
1
+ // /src/app/utils/chunk.c: implements /src/app/utils/chunk.h
2
+
3
+ #include <sys/stat.h>
4
+ #include <stdio.h>
5
+ #include <stdlib.h>
6
+ #include <string.h>
7
+
8
+ #include "chunk.h"
9
+
10
+ /**
11
+ * @brief Checks if a character is a newline character ('\n' or '\r').
12
+ * @param c The character to check.
13
+ * @return int 1 if newline, 0 otherwise.
14
+ */
15
+ static int zsv_is_newline(char c) {
16
+ return (c == '\n' || c == '\r');
17
+ }
18
+
19
+ /**
20
+ * @brief Scans forward from an initial offset to find the first position after a newline sequence.
21
+ *
22
+ * @param fp The open file pointer.
23
+ * @param initial_offset The starting point of the search (nominal boundary).
24
+ * @param boundary The absolute maximum file size (total_size).
25
+ * @param only_crlf If non-zero, only treat \r\n as a newline.
26
+ * @return zsv_file_pos The position after the newline sequence, or -1 if not found.
27
+ */
28
+ static zsv_file_pos zsv_find_chunk_start(FILE *fp, zsv_file_pos initial_offset, zsv_file_pos boundary, int only_crlf) {
29
+ char c;
30
+ // Seek to the initial offset.
31
+ if (fseek(fp, initial_offset, SEEK_SET) != 0) {
32
+ return -1; // Seek error
33
+ }
34
+
35
+ // Scan forward for the start of a newline sequence
36
+ while (ftell(fp) < boundary && fread(&c, 1, 1, fp) == 1) {
37
+ if (only_crlf) {
38
+ if (c == '\r') {
39
+ // We found a CR. Check immediately if the next char is LF.
40
+ char next;
41
+ if (ftell(fp) < boundary && fread(&next, 1, 1, fp) == 1) {
42
+ if (next == '\n') {
43
+ // Found \r\n sequence. The chunk starts immediately after.
44
+ return ftell(fp);
45
+ }
46
+ // The next char was NOT \n.
47
+ // We must rewind one byte so the loop processes 'next' correctly
48
+ // (in case 'next' is itself a \r starting a valid sequence).
49
+ fseek(fp, -1, SEEK_CUR);
50
+ }
51
+ }
52
+ } else {
53
+ if (zsv_is_newline(c)) {
54
+ // Found the start of a sequence. Scan past all consecutive newline characters.
55
+ zsv_file_pos position_after_newline = ftell(fp);
56
+
57
+ while (position_after_newline < boundary && fread(&c, 1, 1, fp) == 1) {
58
+ if (zsv_is_newline(c)) {
59
+ position_after_newline = ftell(fp); // Keep tracking position past the sequence
60
+ } else {
61
+ // Found the first non-newline character.
62
+ // The new start is at the current position (one byte past the last read)
63
+ // so we return the start of that character (ftell - 1).
64
+ return ftell(fp) - 1;
65
+ }
66
+ }
67
+ // If inner loop breaks due to EOF, return -1
68
+ return -1;
69
+ }
70
+ }
71
+ }
72
+
73
+ // Reached EOF/boundary without finding a valid split point
74
+ return -1;
75
+ }
76
+
77
+ static int zsv_read_first_line_at_offset(const char *filename, zsv_file_pos offset, char *buffer, size_t buf_size) {
78
+ FILE *fp = fopen(filename, "rb");
79
+ if (fp == NULL) {
80
+ perror("zsv_read_first_line_at_offset: Failed to open file");
81
+ return -1;
82
+ }
83
+
84
+ if (offset < 0 || fseek(fp, offset, SEEK_SET) != 0) {
85
+ fprintf(stderr, "zsv_read_first_line_at_offset: Error: Invalid offset or fseek failed at %lld\n",
86
+ (long long)offset);
87
+ fclose(fp);
88
+ return -1;
89
+ }
90
+
91
+ // Use fgets. It handles both \n and \r\n line endings appropriately.
92
+ if (fgets(buffer, (int)buf_size, fp) == NULL) {
93
+ if (feof(fp)) {
94
+ buffer[0] = '\0'; // Empty chunk
95
+ } else {
96
+ perror("zsv_read_first_line_at_offset: fgets failed");
97
+ fclose(fp);
98
+ return -1;
99
+ }
100
+ }
101
+
102
+ // Remove the trailing newline sequence (CRLF or LF) for clean output (DRY cleanup logic)
103
+ size_t len = strlen(buffer);
104
+ if (len > 0) {
105
+ // Check for LF
106
+ if (buffer[len - 1] == '\n') {
107
+ buffer[--len] = '\0';
108
+ }
109
+ // Check for CR (handles both bare CR and the CR in CRLF)
110
+ if (len > 0 && buffer[len - 1] == '\r') {
111
+ buffer[len - 1] = '\0';
112
+ }
113
+ }
114
+
115
+ fclose(fp);
116
+ return 0;
117
+ }
118
+
119
+ // --- Public Library Implementations ---
120
+
121
+ struct zsv_chunk_position *zsv_guess_file_chunks(const char *filename, uint64_t N, uint64_t min_size,
122
+ zsv_file_pos initial_offset
123
+ #ifndef ZSV_NO_ONLY_CRLF
124
+ ,
125
+ int only_crlf
126
+ #endif
127
+ ) {
128
+
129
+ #ifdef ZSV_NO_ONLY_CRLF
130
+ int only_crlf = 0;
131
+ #endif
132
+ if (N == 0)
133
+ return NULL;
134
+
135
+ // Open in binary mode ('rb') is crucial for accurate byte counts.
136
+ FILE *fp = fopen(filename, "rb");
137
+ if (fp == NULL) {
138
+ perror("zsv_guess_file_chunks: Failed to open file");
139
+ return NULL;
140
+ }
141
+
142
+ // 1. Get total file size using fstat()
143
+ struct stat st;
144
+ if (fstat(fileno(fp), &st) == -1) {
145
+ perror("zsv_guess_file_chunks: fstat failed");
146
+ fclose(fp);
147
+ return NULL;
148
+ }
149
+ zsv_file_pos total_size = (zsv_file_pos)st.st_size;
150
+ if (total_size < initial_offset) {
151
+ perror("zsv_guess_file_chunks: initial_offset exceeds file size");
152
+ fclose(fp);
153
+ return NULL;
154
+ }
155
+ total_size -= initial_offset;
156
+
157
+ if (total_size < (zsv_file_pos)min_size) {
158
+ fprintf(stderr, "file size too small for parallelization\n");
159
+ fclose(fp);
160
+ return NULL;
161
+ }
162
+
163
+ // Allocate memory for the N chunk positions
164
+ struct zsv_chunk_position *chunks = (struct zsv_chunk_position *)malloc(N * sizeof(*chunks));
165
+ if (chunks == NULL) {
166
+ perror("zsv_guess_file_chunks: malloc failed");
167
+ fclose(fp);
168
+ return NULL;
169
+ }
170
+
171
+ if (initial_offset)
172
+ fseek(fp, initial_offset, SEEK_SET);
173
+
174
+ zsv_file_pos base_size = total_size / N;
175
+ zsv_file_pos current_offset = initial_offset;
176
+
177
+ for (uint64_t i = 0; i < N; ++i) {
178
+ chunks[i].start = current_offset;
179
+
180
+ // Calculate the initial nominal boundary for this chunk
181
+ zsv_file_pos nominal_boundary = (i == N - 1) ? total_size : (zsv_file_pos)((i + 1) * base_size);
182
+
183
+ if (i < N - 1) {
184
+ // Adjust the boundary for all but the last chunk
185
+ // Pass the only_crlf flag down to the helper
186
+ zsv_file_pos new_start_offset = zsv_find_chunk_start(fp, nominal_boundary, total_size, only_crlf);
187
+
188
+ if (new_start_offset < 0) {
189
+ // Warning: Could not find a valid split after nominal boundary
190
+ // We use the nominal boundary, which might break a line
191
+ chunks[i].end = nominal_boundary - 1;
192
+ current_offset = nominal_boundary;
193
+ } else {
194
+ chunks[i].end = new_start_offset - 1;
195
+ current_offset = new_start_offset;
196
+ }
197
+ } else {
198
+ // The last chunk always ends at the total_size - 1 byte
199
+ chunks[i].end = total_size + initial_offset > 0 ? total_size + initial_offset - 1 : 0;
200
+ }
201
+
202
+ // Defensive check for inverted start/end
203
+ if (chunks[i].start > chunks[i].end && total_size > 0)
204
+ chunks[i].end = chunks[i].start;
205
+ }
206
+
207
+ fclose(fp);
208
+ return chunks;
209
+ }
210
+
211
+ void zsv_free_chunks(struct zsv_chunk_position *chunks) {
212
+ if (chunks) {
213
+ free(chunks);
214
+ }
215
+ }
216
+
217
+ const char *zsv_chunk_status_str(enum zsv_chunk_status stat) {
218
+ switch (stat) {
219
+ case zsv_chunk_status_ok:
220
+ return NULL;
221
+ case zsv_chunk_status_no_file_input:
222
+ return "Parallelization requires a file input";
223
+ case zsv_chunk_status_overwrite:
224
+ return "Parallelization cannot be used with overwrite";
225
+ case zsv_chunk_status_max_rows:
226
+ return "Parallelization cannot be used with -L,--limit-rows";
227
+ }
228
+ return NULL;
229
+ }
230
+
231
+ enum zsv_chunk_status zsv_chunkable(const char *inputpath, struct zsv_opts *opts) {
232
+ if (!inputpath)
233
+ return zsv_chunk_status_no_file_input;
234
+ struct zsv_opt_overwrite o = {0};
235
+ if (memcmp(&opts->overwrite, &o, sizeof(o)) || opts->overwrite_auto)
236
+ return zsv_chunk_status_overwrite;
237
+ if (opts->max_rows)
238
+ return zsv_chunk_status_max_rows;
239
+ return zsv_chunk_status_ok;
240
+ }
@@ -0,0 +1,63 @@
1
+ #ifndef ZSV_CHUNK_H
2
+ #define ZSV_CHUNK_H
3
+
4
+ #include <zsv/common.h> // struct zsv_opts
5
+ #include <stddef.h> // For size_t
6
+ #include <stdint.h> // For uint64_t
7
+ #include <sys/types.h> // For off_t
8
+
9
+ typedef off_t zsv_file_pos;
10
+
11
+ // Define a struct to hold the (start, end) pair using the standard zsv_file_pos type
12
+ struct zsv_chunk_position {
13
+ zsv_file_pos start;
14
+ zsv_file_pos end;
15
+ };
16
+
17
+ /**
18
+ * @brief Divide a file into N chunks for parallel processing.
19
+ *
20
+ * Scans the file to find N approximately equal sections, ensuring that
21
+ * chunk boundaries align with newline sequences so rows are not split.
22
+ *
23
+ * @param filename Path to the file to be chunked.
24
+ * @param N The target number of chunks.
25
+ * @param min_size The minimum file size required to attempt parallelization.
26
+ * @param initial_offset The byte offset to start chunking from (usually 0).
27
+ * @param only_crlf If non-zero, boundaries are split strictly on \r\n sequences.
28
+ * If zero, \r or \n are accepted as boundaries.
29
+ * @return struct zsv_chunk_position* An array of N chunk positions (must be freed by caller),
30
+ * or NULL if the file cannot be chunked or an error occurs.
31
+ */
32
+ struct zsv_chunk_position *zsv_guess_file_chunks(const char *filename, uint64_t N, uint64_t min_size,
33
+ zsv_file_pos initial_offset
34
+ #ifndef ZSV_NO_ONLY_CRLF
35
+ ,
36
+ int only_crlf
37
+ #endif
38
+ );
39
+
40
+ /**
41
+ * @brief Frees the memory allocated by zsv_guess_file_chunks. (DRY Cleanup)
42
+ * @param chunks The pointer to the allocated chunk array.
43
+ */
44
+ void zsv_free_chunks(struct zsv_chunk_position *chunks);
45
+
46
+ enum zsv_chunk_status {
47
+ zsv_chunk_status_ok = 0,
48
+ zsv_chunk_status_no_file_input,
49
+ zsv_chunk_status_overwrite,
50
+ zsv_chunk_status_max_rows
51
+ };
52
+
53
+ /**
54
+ * zsv_chunkable(): check if chunking is compatible wth options; return chunk_status
55
+ */
56
+ enum zsv_chunk_status zsv_chunkable(const char *inputpath, struct zsv_opts *opts);
57
+
58
+ /**
59
+ * Convert zsv_chunk_status to string description
60
+ */
61
+ const char *zsv_chunk_status_str(enum zsv_chunk_status stat);
62
+
63
+ #endif // ZSV_CHUNK_H
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Copyright (C) 2021 Liquidaty and the zsv/lib contributors
3
+ * All rights reserved
4
+ *
5
+ * This file is part of zsv/lib, distributed under the license defined at
6
+ * https://opensource.org/licenses/MIT
7
+ */
8
+
9
+ #include <zsv/utils/clock.h>
10
+
11
+ clock_t zsv_clock_begin;
12
+ clock_t zsv_clock_in;
13
+ clock_t zsv_clock_out;
14
+ int i_tmp;
15
+
16
+ size_t zsv_fread_clock(void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream) {
17
+ clock_t clock_tmp = clock();
18
+ size_t sz = fread(ptr, size, nitems, stream);
19
+ zsv_clock_in += clock() - clock_tmp;
20
+ return sz;
21
+ }
22
+
23
+ size_t zsv_fwrite_clock(const void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream) {
24
+ clock_t clock_tmp = clock();
25
+ size_t sz = fwrite(ptr, size, nitems, stream);
26
+ zsv_clock_out += clock() - clock_tmp;
27
+ return sz;
28
+ }
29
+
30
+ int zsv_fflush_clock(FILE *stream) {
31
+ clock_t clock_tmp = clock();
32
+ int i = fflush(stream);
33
+ zsv_clock_out += clock() - clock_tmp;
34
+ return i;
35
+ }
36
+
37
+ void zsv_clocks_begin(void) {
38
+ zsv_clock_in = zsv_clock_out = 0;
39
+ zsv_clock_begin = clock();
40
+ }
41
+
42
+ void zsv_clocks_end(void) {
43
+ clock_t clock_end = clock();
44
+ clock_t clock_total = clock_end - zsv_clock_begin;
45
+ clock_t clock_other = clock_total - zsv_clock_in - zsv_clock_out;
46
+ fprintf(stderr,
47
+ "elapsed time:\n"
48
+ " total %zu, %Lf\n"
49
+ " in %zu, %Lf\n"
50
+ " out %zu, %Lf\n"
51
+ " other %zu, %Lf\n"
52
+ "\n",
53
+ (size_t)(clock_total), (long double)(clock_total) / CLOCKS_PER_SEC, (size_t)zsv_clock_in,
54
+ (long double)(zsv_clock_in) / CLOCKS_PER_SEC, (size_t)zsv_clock_out,
55
+ (long double)(zsv_clock_out) / CLOCKS_PER_SEC, (size_t)clock_other,
56
+ (long double)(clock_other) / CLOCKS_PER_SEC);
57
+ }