zsv 1.3.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +41 -3
  4. data/ext/zsv/extconf.rb +1 -1
  5. data/lib/zsv/version.rb +1 -1
  6. metadata +6 -226
  7. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +0 -756
  8. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +0 -381
  9. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +0 -228
  10. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +0 -123
  11. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +0 -39
  12. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +0 -104
  13. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +0 -41
  14. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +0 -1
  15. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +0 -14
  16. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +0 -19
  17. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +0 -116
  18. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +0 -194
  19. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +0 -796
  20. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +0 -41
  21. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +0 -16
  22. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +0 -280
  23. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +0 -36
  24. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +0 -913
  25. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +0 -23
  26. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +0 -20
  27. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +0 -140
  28. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +0 -91
  29. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +0 -81
  30. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +0 -82
  31. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +0 -404
  32. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +0 -569
  33. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +0 -365
  34. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +0 -366
  35. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +0 -341
  36. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +0 -263
  37. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +0 -298
  38. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +0 -157
  39. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +0 -177
  40. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +0 -444
  41. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +0 -145
  42. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +0 -110
  43. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +0 -15
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +0 -64
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +0 -1955
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +0 -6802
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +0 -230517
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +0 -12174
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +0 -2
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +0 -142
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +0 -49
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +0 -485
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +0 -1015
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +0 -663
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +0 -85
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +0 -75
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +0 -167
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +0 -228
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +0 -186
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +0 -23
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +0 -76
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +0 -167
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +0 -238
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +0 -186
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +0 -184
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +0 -52
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +0 -34
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +0 -103
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +0 -57
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +0 -69
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +0 -220
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +0 -34
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +0 -362
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +0 -764
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +0 -117
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +0 -508
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +0 -78
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +0 -505
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +0 -7
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +0 -59
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +0 -208
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +0 -795
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +0 -28
  84. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +0 -851
  85. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +0 -106
  86. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +0 -6
  87. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +0 -113
  88. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +0 -90
  89. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +0 -295
  90. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +0 -175
  91. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +0 -693
  92. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +0 -980
  93. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +0 -131
  94. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +0 -130
  95. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +0 -118
  96. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +0 -45
  97. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +0 -41
  98. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +0 -107
  99. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +0 -20
  100. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +0 -61
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +0 -14
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +0 -192
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +0 -72
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +0 -812
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +0 -753
  106. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +0 -372
  107. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +0 -15
  108. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +0 -119
  109. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +0 -45
  110. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +0 -63
  111. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +0 -12
  112. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +0 -166
  113. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +0 -214
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +0 -128
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +0 -43
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +0 -81
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +0 -25
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +0 -325
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +0 -73
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +0 -203
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +0 -7
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +0 -318
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +0 -134
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +0 -119
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +0 -322
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +0 -203
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +0 -36
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +0 -167
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +0 -36
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +0 -153
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +0 -32
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +0 -312
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +0 -29
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +0 -266
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +0 -9
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +0 -60
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +0 -1007
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +0 -453
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +0 -101
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +0 -49
  141. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +0 -393
  142. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +0 -322
  143. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +0 -228
  144. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +0 -91
  145. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +0 -240
  146. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +0 -63
  147. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +0 -57
  148. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +0 -148
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +0 -2
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +0 -427
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +0 -253
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +0 -121
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +0 -20
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +0 -159
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +0 -24
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +0 -180
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +0 -256
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +0 -197
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +0 -49
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +0 -400
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +0 -120
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +0 -18
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +0 -132
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +0 -178
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +0 -258
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +0 -246
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +0 -123
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +0 -153
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +0 -54
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +0 -267
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +0 -53
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +0 -357
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +0 -83
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +0 -33
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +0 -184
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +0 -292
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +0 -259
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +0 -13
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +0 -255
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +0 -96
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +0 -361
  182. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +0 -40
  183. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +0 -16
  184. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +0 -44
  185. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +0 -3
  186. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +0 -100
  187. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +0 -143
  188. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +0 -89
  189. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +0 -123
  190. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +0 -16
  191. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +0 -336
  192. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +0 -361
  193. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +0 -62
  194. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +0 -113
  195. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +0 -73
  196. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +0 -329
  197. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +0 -90
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +0 -49
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +0 -36
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +0 -58
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +0 -19
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +0 -147
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +0 -22
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +0 -28
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +0 -22
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +0 -17
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +0 -99
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +0 -65
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +0 -19
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +0 -19
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +0 -13
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +0 -54
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +0 -71
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +0 -53
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +0 -107
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +0 -18
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +0 -11
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +0 -148
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +0 -41
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +0 -25
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +0 -101
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +0 -33
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +0 -20
  224. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +0 -60
  225. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +0 -484
  226. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +0 -731
  227. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +0 -285
  228. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +0 -88
  229. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +0 -51
@@ -1,404 +0,0 @@
1
- /*
2
- * Copyright (C) 2021 Liquidaty and the zsv/lib contributors
3
- * All rights reserved
4
- *
5
- * This file is part of zsv/lib, distributed under the license defined at
6
- * https://opensource.org/licenses/MIT
7
- */
8
-
9
- #include <stdio.h>
10
- #include <string.h>
11
- #include <stdlib.h>
12
- #include <pthread.h>
13
- #include <sys/types.h> // off_t
14
-
15
- #define ZSV_COMMAND count
16
- #include "zsv_command.h"
17
- #include <zsv/utils/file.h>
18
- #include <zsv/utils/os.h> // zsv_get_number_of_cores
19
- #include "utils/chunk.h"
20
-
21
- #define ZSV_COUNT_PARALLEL_MIN_BYTES (1024 * 1024 * 2)
22
-
23
- struct zsv_chunk_count_data {
24
- unsigned int id;
25
- size_t start_offset;
26
- size_t end_offset;
27
-
28
- size_t actual_next_row_start;
29
- size_t row_count;
30
- int status;
31
-
32
- const char *input_path;
33
- struct zsv_opts *opts_template;
34
-
35
- int skip;
36
- };
37
-
38
- struct zsv_count_parallel_data {
39
- unsigned int chunk_count;
40
- struct zsv_chunk_count_data *chunks;
41
- pthread_t *threads;
42
- };
43
-
44
- struct data {
45
- zsv_parser parser;
46
- size_t rows;
47
-
48
- struct zsv_opts *opts;
49
- const char *input_path;
50
- unsigned int num_chunks;
51
-
52
- int run_in_parallel;
53
- int cancelled;
54
- #ifndef ZSV_NO_PARALLEL
55
- struct zsv_count_parallel_data *pdata;
56
- size_t end_offset_limit; // where this chunk (chunk 0) should stop
57
- size_t next_row_start; // where chunk 0 actually ended
58
- #endif
59
- };
60
-
61
- #ifndef ZSV_NO_PARALLEL
62
- static void *process_chunk_internal(struct zsv_chunk_count_data *cdata);
63
-
64
- static struct zsv_count_parallel_data *parallel_data_new(unsigned int count) {
65
- struct zsv_count_parallel_data *pd = calloc(1, sizeof(*pd));
66
- if (!pd)
67
- return NULL;
68
- pd->chunk_count = count;
69
- pd->chunks = calloc(count, sizeof(*pd->chunks));
70
- pd->threads = calloc(count, sizeof(*pd->threads));
71
- if (!pd->chunks || !pd->threads) {
72
- free(pd->chunks);
73
- free(pd->threads);
74
- free(pd);
75
- return NULL;
76
- }
77
- return pd;
78
- }
79
-
80
- static void parallel_data_delete(struct zsv_count_parallel_data *pd) {
81
- if (pd) {
82
- free(pd->chunks);
83
- free(pd->threads);
84
- free(pd);
85
- }
86
- }
87
-
88
- #endif
89
-
90
- /* serial (non-parallelized) row handlers */
91
- static void row_verbose(void *ctx) {
92
- struct data *data = ctx;
93
- data->rows++;
94
- if (data->rows % 1000000 == 0)
95
- fprintf(stderr, "Processed %zu data rows\n", data->rows / 1000000);
96
- }
97
-
98
- static void row_simple(void *ctx) {
99
- ((struct data *)ctx)->rows++;
100
- }
101
-
102
- #ifndef ZSV_NO_PARALLEL
103
- /* parallelized row handers */
104
- static void row_parallel_done(void *ctx) {
105
- struct data *data = ctx;
106
- // Find start of the next row
107
- data->next_row_start = zsv_cum_scanned_length(data->parser) - zsv_row_length_raw_bytes(data->parser);
108
- zsv_abort(data->parser);
109
- data->cancelled = 1;
110
- }
111
-
112
- static void row_parallel(void *ctx) {
113
- struct data *data = ctx;
114
- data->rows++;
115
-
116
- if (UNLIKELY((off_t)zsv_cum_scanned_length(data->parser) >= data->end_offset_limit)) {
117
- // We crossed the boundary. We must finish this row, then stop.
118
- // Switch handler to 'done' to catch the exact end of this row.
119
- zsv_set_row_handler(data->parser, row_parallel_done);
120
- }
121
- }
122
-
123
- struct worker_ctx {
124
- struct zsv_chunk_count_data *cdata;
125
- zsv_parser parser;
126
- size_t limit_len;
127
- int cancelled;
128
- };
129
-
130
- static void worker_row_done(void *ctx) {
131
- struct worker_ctx *wctx = ctx;
132
- // Calculate absolute offset of the *next* row start
133
- size_t scanned = zsv_cum_scanned_length(wctx->parser);
134
- wctx->cdata->actual_next_row_start = wctx->cdata->start_offset + scanned - zsv_row_length_raw_bytes(wctx->parser);
135
- zsv_abort(wctx->parser);
136
- wctx->cancelled = 1;
137
- }
138
-
139
- static void worker_row(void *ctx) {
140
- struct worker_ctx *wctx = ctx;
141
- wctx->cdata->row_count++;
142
-
143
- if (UNLIKELY((off_t)zsv_cum_scanned_length(wctx->parser) >= wctx->limit_len)) {
144
- zsv_set_row_handler(wctx->parser, worker_row_done);
145
- }
146
- }
147
-
148
- static void *process_chunk_thread(void *arg) {
149
- struct zsv_chunk_count_data *cdata = arg;
150
- return process_chunk_internal(cdata);
151
- }
152
-
153
- static void *process_chunk_internal(struct zsv_chunk_count_data *cdata) {
154
- cdata->row_count = 0;
155
- cdata->status = 0;
156
-
157
- if (cdata->start_offset >= cdata->end_offset) {
158
- cdata->actual_next_row_start = cdata->start_offset;
159
- cdata->skip = 1;
160
- return NULL;
161
- }
162
-
163
- struct zsv_opts opts = *cdata->opts_template;
164
- struct worker_ctx wctx = {0};
165
- wctx.cdata = cdata;
166
- wctx.limit_len = cdata->end_offset - cdata->start_offset;
167
-
168
- FILE *f = fopen(cdata->input_path, "rb");
169
- if (!f) {
170
- cdata->status = zsv_status_error;
171
- return NULL;
172
- }
173
-
174
- if (fseeko(f, cdata->start_offset, SEEK_SET) != 0) {
175
- fclose(f);
176
- cdata->status = zsv_status_error;
177
- return NULL;
178
- }
179
-
180
- opts.stream = f;
181
- opts.ctx = &wctx;
182
- opts.row_handler = worker_row;
183
-
184
- wctx.parser = zsv_new(&opts);
185
- if (wctx.parser == NULL) {
186
- fclose(f);
187
- cdata->status = zsv_status_error;
188
- return NULL;
189
- }
190
-
191
- enum zsv_status status = zsv_status_ok;
192
- while (status == zsv_status_ok && !wctx.cancelled) {
193
- status = zsv_parse_more(wctx.parser);
194
- }
195
-
196
- // if finished naturally (eof)
197
- if (!wctx.cancelled) {
198
- cdata->actual_next_row_start = cdata->start_offset + zsv_cum_scanned_length(wctx.parser);
199
- }
200
-
201
- zsv_finish(wctx.parser);
202
- zsv_delete(wctx.parser);
203
- fclose(f);
204
- return NULL;
205
- }
206
- #endif
207
-
208
- static void header_handler(void *ctx) {
209
- struct data *data = ctx;
210
- #ifndef ZSV_NO_PARALLEL
211
- if (data->input_path && data->num_chunks > 1) {
212
- size_t header_end = zsv_cum_scanned_length(data->parser);
213
- struct zsv_chunk_position *offsets =
214
- zsv_guess_file_chunks(data->input_path, data->num_chunks, ZSV_COUNT_PARALLEL_MIN_BYTES, header_end
215
- #ifndef ZSV_NO_ONLY_CRLF
216
- ,
217
- data->opts->only_crlf_rowend
218
- #endif
219
- );
220
-
221
- if (offsets) {
222
- data->pdata = parallel_data_new(data->num_chunks);
223
- if (!data->pdata) {
224
- fprintf(stderr, "Out of memory!\n");
225
- zsv_free_chunks(offsets);
226
- } else {
227
- data->run_in_parallel = 1;
228
- if (data->opts->verbose) {
229
- for (unsigned int i = 0; i < data->num_chunks; i++) {
230
- fprintf(stderr, "Chunk %i: %zu - %zu\n", i + 1, offsets[i].start, offsets[i].end);
231
- }
232
- }
233
-
234
- /* set up worker chunks (1..n) */
235
- for (unsigned int i = 1; i < data->num_chunks; i++) {
236
- struct zsv_chunk_count_data *c = &data->pdata->chunks[i];
237
- c->id = i;
238
- c->start_offset = offsets[i].start;
239
- c->end_offset = offsets[i].end;
240
- c->input_path = data->input_path;
241
- c->opts_template = data->opts;
242
-
243
- if (pthread_create(&data->pdata->threads[i], NULL, process_chunk_thread, c) != 0) {
244
- fprintf(stderr, "Error creating thread %d\n", i);
245
- data->run_in_parallel = 0;
246
- break;
247
- }
248
- }
249
-
250
- if (data->run_in_parallel) {
251
- data->end_offset_limit = offsets[0].end;
252
- zsv_set_row_handler(data->parser, row_parallel);
253
- data->run_in_parallel = 1;
254
- }
255
- }
256
- zsv_free_chunks(offsets);
257
- }
258
- }
259
- #endif
260
-
261
- if (!data->run_in_parallel) { // single-threaded serial run
262
- data->run_in_parallel = 0;
263
- zsv_set_row_handler(data->parser, data->opts->verbose ? row_verbose : row_simple);
264
- }
265
- }
266
-
267
- static int count_usage(void) {
268
- static const char *usage = "Usage: count [options]\n"
269
- "\n"
270
- "Options:\n"
271
- " -h,--help : show usage\n"
272
- " -i,--input <filename> : use specified file input\n"
273
- #ifndef ZSV_NO_PARALLEL
274
- " -j,--jobs <n> : number of jobs (parallel threads)\n"
275
- " --parallel : use all available cores\n"
276
- #endif
277
- ;
278
- printf("%s\n", usage);
279
- return 0;
280
- }
281
-
282
- int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *optsp,
283
- struct zsv_prop_handler *custom_prop_handler) {
284
- struct data data = {0};
285
- struct zsv_opts opts = *optsp;
286
- data.opts = &opts;
287
-
288
- int err = 0;
289
- for (int i = 1; !err && i < argc; i++) {
290
- const char *arg = argv[i];
291
- if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
292
- count_usage();
293
- goto count_done;
294
- }
295
- if (!strcmp(arg, "-i") || !strcmp(arg, "--input") || *arg != '-') {
296
- err = 1;
297
- if ((!strcmp(arg, "-i") || !strcmp(arg, "--input")) && ++i >= argc)
298
- fprintf(stderr, "%s option requires a filename\n", arg);
299
- else {
300
- if (opts.stream)
301
- fprintf(stderr, "Input may not be specified more than once\n");
302
- else if (!(opts.stream = fopen(argv[i], "rb")))
303
- fprintf(stderr, "Unable to open for reading: %s\n", argv[i]);
304
- else {
305
- data.input_path = argv[i];
306
- err = 0;
307
- }
308
- }
309
- #ifndef ZSV_NO_PARALLEL
310
- } else if (!strcmp(arg, "-j") || !strcmp(arg, "--jobs")) {
311
- if (++i >= argc)
312
- err = 1;
313
- else
314
- data.num_chunks = atoi(argv[i]);
315
- } else if (!strcmp(arg, "--parallel")) {
316
- data.num_chunks = zsv_get_number_of_cores();
317
- if (data.num_chunks < 2) {
318
- fprintf(stderr, "Warning: --parallel specified but only one core found; using -j 4 instead");
319
- data.num_chunks = 4;
320
- }
321
- #endif
322
- } else {
323
- fprintf(stderr, "Unrecognized option: %s\n", arg);
324
- err = 1;
325
- }
326
- }
327
-
328
- #ifdef NO_STDIN
329
- if (!opts.stream || opts.stream == stdin) {
330
- fprintf(stderr, "Please specify an input file\n");
331
- err = 1;
332
- }
333
- #endif
334
- #ifndef ZSV_NO_PARALLEL
335
- if (data.num_chunks > 1) {
336
- enum zsv_chunk_status chstat = zsv_chunkable(data.input_path, &opts);
337
- if (chstat != zsv_chunk_status_ok) {
338
- fprintf(stderr, "%s\n", zsv_chunk_status_str(chstat));
339
- err = 1;
340
- }
341
- }
342
- #endif
343
- if (!err) {
344
- opts.row_handler = header_handler;
345
- opts.ctx = &data;
346
-
347
- if (zsv_new_with_properties(&opts, custom_prop_handler, data.input_path, &data.parser) != zsv_status_ok) {
348
- fprintf(stderr, "Unable to initialize parser\n");
349
- err = 1;
350
- } else {
351
- enum zsv_status status;
352
-
353
- /* Main Parse Loop */
354
- while (!data.cancelled && (status = zsv_parse_more(data.parser)) == zsv_status_ok)
355
- ;
356
- zsv_finish(data.parser);
357
-
358
- #ifndef ZSV_NO_PARALLEL
359
- if (data.run_in_parallel) {
360
- if (!data.next_row_start)
361
- // not likely to get here but just in case
362
- data.next_row_start = zsv_cum_scanned_length(data.parser);
363
-
364
- size_t total_rows = data.rows;
365
- // aggregate results
366
- for (unsigned int i = 1; i < data.num_chunks; i++) {
367
- pthread_join(data.pdata->threads[i], NULL);
368
-
369
- struct zsv_chunk_count_data *prev_chunk = (i == 1) ? NULL : &data.pdata->chunks[i - 1];
370
- struct zsv_chunk_count_data *curr_chunk = &data.pdata->chunks[i];
371
-
372
- // determine where the previous chunk actually ended
373
- size_t prev_end = (i == 1) ? data.next_row_start : prev_chunk->actual_next_row_start;
374
- // check overlap
375
- if (prev_end > curr_chunk->start_offset) {
376
- if (data.opts->verbose) {
377
- fprintf(stderr, "Overlap detected at chunk %u (expected %zu, got %zu). Reprocessing.\n", i,
378
- curr_chunk->start_offset, prev_end);
379
- }
380
- // reprocess synchronously
381
- curr_chunk->start_offset = prev_end;
382
- process_chunk_internal(curr_chunk);
383
- }
384
-
385
- total_rows += curr_chunk->row_count;
386
- }
387
-
388
- printf("%zu\n", total_rows);
389
- parallel_data_delete(data.pdata);
390
-
391
- } else
392
- #endif
393
- // result from running serially
394
- printf("%zu\n", data.rows);
395
- zsv_delete(data.parser);
396
- }
397
- }
398
-
399
- count_done:
400
- if (opts.stream && opts.stream != stdin)
401
- fclose(opts.stream);
402
-
403
- return err;
404
- }