zsv 1.3.0 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +41 -3
  4. data/ext/zsv/extconf.rb +66 -49
  5. data/ext/zsv/options.c +1 -2
  6. data/ext/zsv/parser.c +14 -0
  7. data/ext/zsv/zsv_ext.c +3 -0
  8. data/lib/zsv/version.rb +1 -1
  9. metadata +6 -226
  10. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +0 -756
  11. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +0 -381
  12. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +0 -228
  13. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +0 -123
  14. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +0 -39
  15. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +0 -104
  16. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +0 -41
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +0 -1
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +0 -14
  19. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +0 -19
  20. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +0 -116
  21. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +0 -194
  22. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +0 -796
  23. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +0 -41
  24. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +0 -16
  25. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +0 -280
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +0 -36
  27. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +0 -913
  28. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +0 -23
  29. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +0 -20
  30. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +0 -140
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +0 -91
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +0 -81
  33. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +0 -82
  34. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +0 -404
  35. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +0 -569
  36. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +0 -365
  37. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +0 -366
  38. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +0 -341
  39. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +0 -263
  40. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +0 -298
  41. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +0 -157
  42. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +0 -177
  43. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +0 -444
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +0 -145
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +0 -110
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +0 -15
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +0 -64
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +0 -1955
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +0 -6802
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +0 -230517
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +0 -12174
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +0 -2
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +0 -142
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +0 -49
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +0 -485
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +0 -1015
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +0 -663
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +0 -85
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +0 -75
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +0 -167
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +0 -228
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +0 -186
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +0 -23
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +0 -76
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +0 -167
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +0 -238
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +0 -186
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +0 -184
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +0 -52
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +0 -34
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +0 -103
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +0 -57
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +0 -69
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +0 -220
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +0 -34
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +0 -362
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +0 -764
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +0 -117
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +0 -508
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +0 -78
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +0 -505
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +0 -7
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +0 -59
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +0 -208
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +0 -795
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +0 -28
  87. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +0 -851
  88. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +0 -106
  89. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +0 -6
  90. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +0 -113
  91. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +0 -90
  92. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +0 -295
  93. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +0 -175
  94. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +0 -693
  95. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +0 -980
  96. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +0 -131
  97. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +0 -130
  98. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +0 -118
  99. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +0 -45
  100. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +0 -41
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +0 -107
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +0 -20
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +0 -61
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +0 -14
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +0 -192
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +0 -72
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +0 -812
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +0 -753
  109. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +0 -372
  110. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +0 -15
  111. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +0 -119
  112. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +0 -45
  113. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +0 -63
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +0 -12
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +0 -166
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +0 -214
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +0 -128
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +0 -43
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +0 -81
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +0 -25
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +0 -325
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +0 -73
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +0 -203
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +0 -7
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +0 -318
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +0 -134
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +0 -119
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +0 -322
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +0 -203
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +0 -36
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +0 -167
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +0 -36
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +0 -153
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +0 -32
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +0 -312
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +0 -29
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +0 -266
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +0 -9
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +0 -60
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +0 -1007
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +0 -453
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +0 -101
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +0 -49
  144. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +0 -393
  145. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +0 -322
  146. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +0 -228
  147. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +0 -91
  148. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +0 -240
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +0 -63
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +0 -57
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +0 -148
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +0 -2
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +0 -427
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +0 -253
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +0 -121
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +0 -20
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +0 -159
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +0 -24
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +0 -180
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +0 -256
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +0 -197
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +0 -49
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +0 -400
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +0 -120
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +0 -18
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +0 -132
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +0 -178
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +0 -258
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +0 -246
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +0 -123
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +0 -153
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +0 -54
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +0 -267
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +0 -53
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +0 -357
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +0 -83
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +0 -33
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +0 -184
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +0 -292
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +0 -259
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +0 -13
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +0 -255
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +0 -96
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +0 -361
  185. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +0 -40
  186. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +0 -16
  187. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +0 -44
  188. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +0 -3
  189. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +0 -100
  190. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +0 -143
  191. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +0 -89
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +0 -123
  193. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +0 -16
  194. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +0 -336
  195. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +0 -361
  196. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +0 -62
  197. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +0 -113
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +0 -73
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +0 -329
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +0 -90
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +0 -49
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +0 -36
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +0 -58
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +0 -19
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +0 -147
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +0 -22
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +0 -28
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +0 -22
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +0 -17
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +0 -99
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +0 -65
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +0 -19
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +0 -19
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +0 -13
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +0 -54
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +0 -71
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +0 -53
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +0 -107
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +0 -18
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +0 -11
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +0 -148
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +0 -41
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +0 -25
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +0 -101
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +0 -33
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +0 -20
  227. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +0 -60
  228. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +0 -484
  229. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +0 -731
  230. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +0 -285
  231. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +0 -88
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +0 -51
@@ -1,569 +0,0 @@
1
- /*
2
- * Copyright (C) 2021 Liquidaty and the zsv/lib contributors
3
- * All rights reserved
4
- *
5
- * This file is part of zsv/lib, distributed under the license defined at
6
- * https://opensource.org/licenses/MIT
7
- */
8
-
9
- #include <sglib.h>
10
-
11
- #include <stdio.h>
12
- #include <stdlib.h>
13
- #include <string.h>
14
- #include <math.h>
15
- #include <fenv.h>
16
- #include <time.h>
17
- #include <unistd.h> // unlink()
18
-
19
- #define ZSV_COMMAND desc
20
- #include "zsv_command.h"
21
-
22
- #include <zsv/utils/writer.h>
23
- #include <zsv/utils/file.h>
24
- #include <zsv/utils/mem.h>
25
- #include <zsv/utils/string.h>
26
-
27
- #define ZSV_DESC_MAX_COLS_DEFAULT 32768
28
- #define ZSV_DESC_MAX_COLS_DEFAULT_S "32768"
29
-
30
- #define ZSV_DESC_FLAG_MINMAX 1
31
- #define ZSV_DESC_FLAG_MINMAXLEN 2
32
- #define ZSV_DESC_FLAG_UNIQUE 32
33
- #define ZSV_DESC_FLAG_UNIQUE_CI 64
34
-
35
- struct zsv_desc_string_list {
36
- struct zsv_desc_string_list *next;
37
- unsigned char *value;
38
- size_t count;
39
- };
40
-
41
- void zsv_desc_string_list_free(struct zsv_desc_string_list *e) {
42
- struct zsv_desc_string_list *n;
43
- for (; e; e = n) {
44
- n = e->next;
45
- if (e->value)
46
- free(e->value);
47
- free(e);
48
- }
49
- }
50
-
51
- typedef struct zsv_desc_unique_key {
52
- unsigned char color : 1;
53
- unsigned char _ : 7;
54
- unsigned char *value;
55
- struct zsv_desc_unique_key *left;
56
- struct zsv_desc_unique_key *right;
57
- } zsv_desc_unique_key;
58
-
59
- struct zsv_desc_unique_key_container {
60
- struct zsv_desc_unique_key *key;
61
- size_t max_count;
62
- size_t count;
63
- unsigned char not_enum : 1;
64
- unsigned char dummy : 7;
65
- };
66
-
67
- static struct zsv_desc_unique_key *zsv_desc_unique_key_new(const unsigned char *value, size_t len) {
68
- zsv_desc_unique_key *key = calloc(1, sizeof(*key));
69
- if (!key || !(key->value = malloc(len + 1)))
70
- ; // handle out-of-memory error!
71
- else {
72
- memcpy(key->value, value, len);
73
- key->value[len] = '\0';
74
- }
75
- return key;
76
- }
77
-
78
- static void zsv_desc_unique_key_delete(zsv_desc_unique_key *e) {
79
- if (e)
80
- free(e->value);
81
- free(e);
82
- }
83
-
84
- static int zsv_desc_unique_key_cmp(zsv_desc_unique_key *x, zsv_desc_unique_key *y) {
85
- return strcmp((const char *)x->value, (const char *)y->value);
86
- }
87
-
88
- SGLIB_DEFINE_RBTREE_PROTOTYPES(zsv_desc_unique_key, left, right, color, zsv_desc_unique_key_cmp);
89
- SGLIB_DEFINE_RBTREE_FUNCTIONS(zsv_desc_unique_key, left, right, color, zsv_desc_unique_key_cmp);
90
-
91
- #define ZSV_DESC_MAX_EXAMPLE_COUNT 5 // could make this customizable...
92
- struct zsv_desc_column_data {
93
- char *name;
94
- unsigned int position;
95
-
96
- unsigned char not_unique : 1;
97
- unsigned char not_unique_ci : 1;
98
- unsigned char _ : 6;
99
-
100
- struct zsv_desc_unique_key_container unique_values;
101
- struct zsv_desc_unique_key_container unique_values_ci;
102
- struct zsv_desc_string_list *examples;
103
- struct zsv_desc_string_list **examples_tail;
104
- unsigned int examples_count;
105
-
106
- unsigned int total_count;
107
- struct {
108
- unsigned int count;
109
- } mblank;
110
-
111
- struct {
112
- size_t lo;
113
- size_t hi;
114
- } lengths;
115
- };
116
-
117
- static void zsv_desc_column_data_finalize(struct zsv_desc_column_data *col, unsigned int i) {
118
- col->position = i;
119
- }
120
-
121
- static void zsv_desc_column_unique_values_delete(zsv_desc_unique_key **tree) {
122
- if (tree && *tree) {
123
- struct sglib_zsv_desc_unique_key_iterator it;
124
- struct zsv_desc_unique_key *e;
125
- for (e = sglib_zsv_desc_unique_key_it_init(&it, *tree); e; e = sglib_zsv_desc_unique_key_it_next(&it))
126
- zsv_desc_unique_key_delete(e);
127
- *tree = NULL;
128
- }
129
- }
130
-
131
- static void zsv_desc_column_data_free(struct zsv_desc_column_data *e) {
132
- free(e->name);
133
- zsv_desc_column_unique_values_delete(&e->unique_values.key);
134
- zsv_desc_column_unique_values_delete(&e->unique_values_ci.key);
135
- zsv_desc_string_list_free(e->examples);
136
- }
137
-
138
- struct zsv_desc_column_name {
139
- struct zsv_desc_column_name *next;
140
- char *name;
141
- };
142
-
143
- static void zsv_desc_column_names_delete(struct zsv_desc_column_name **p) {
144
- if (p && *p) {
145
- struct zsv_desc_column_name *next;
146
- for (struct zsv_desc_column_name *e = *p; e; e = next) {
147
- next = e->next;
148
- free(e->name);
149
- free(e);
150
- }
151
- *p = NULL;
152
- }
153
- }
154
-
155
- enum zsv_desc_status {
156
- zsv_desc_status_ok = 0,
157
- zsv_desc_status_error, // generic error
158
- zsv_desc_status_memory,
159
- zsv_desc_status_file,
160
- zsv_desc_status_argument
161
- };
162
-
163
- struct zsv_desc_data {
164
- struct zsv_opts *opts;
165
- const char *input_filename;
166
- zsv_csv_writer csv_writer;
167
-
168
- char header_only;
169
- char *filename;
170
-
171
- void (*header_func)(void *ctx, unsigned int col_ix, const char *name); // api use only
172
- void *header_func_arg;
173
- unsigned int errcount;
174
-
175
- unsigned int max_cols;
176
- unsigned int current_column_ix;
177
-
178
- struct zsv_desc_column_name *column_names;
179
- struct zsv_desc_column_name **column_names_tail;
180
-
181
- unsigned int col_count;
182
- struct zsv_desc_column_data *columns;
183
-
184
- #define ZSV_DESC_MAX_ENUM_DEFAULT 100
185
- size_t max_enum;
186
- size_t row_count;
187
-
188
- char *err_msg;
189
- enum zsv_desc_status err;
190
-
191
- zsv_parser parser;
192
-
193
- unsigned char flags; // see ZSV_DESC_FLAG_XXX
194
- unsigned char done;
195
-
196
- size_t max_row_size;
197
-
198
- char *overflowed;
199
- size_t overflow_count;
200
-
201
- unsigned char quick : 1;
202
- unsigned char _ : 7;
203
- };
204
-
205
- static void zsv_desc_finalize(struct zsv_desc_data *data) {
206
- for (unsigned int i = 0; i < data->col_count; i++)
207
- zsv_desc_column_data_finalize(&data->columns[i], i);
208
- }
209
-
210
- static void write_headers(struct zsv_desc_data *data) {
211
- // TO DO: adjust header for ZSV_DESC_FLAG options
212
- const char *headers1[] = {"#", "Column name", "Min Length", "Max Length", NULL};
213
- const char *headers2[] = {"Count", "Blank %", "Example 1", "Example 2", "Example 3", "Example 4", "Example 5", NULL};
214
- for (int i = 0; headers1[i]; i++)
215
- zsv_writer_cell(data->csv_writer, i == 0, (const unsigned char *)headers1[i], strlen(headers1[i]), 1);
216
-
217
- if (data->flags & ZSV_DESC_FLAG_UNIQUE)
218
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)"Unique", 0);
219
-
220
- if (data->flags & ZSV_DESC_FLAG_UNIQUE_CI)
221
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)"Unique (case-insensitive)", 0);
222
-
223
- for (int i = 0; headers2[i]; i++)
224
- zsv_writer_cell(data->csv_writer, 0, (const unsigned char *)headers2[i], strlen(headers2[i]), 1);
225
- }
226
-
227
- static void zsv_desc_print(struct zsv_desc_data *data) {
228
- if (data->header_only) {
229
- for (unsigned int i = 0; i < data->col_count; i++) {
230
- struct zsv_desc_column_data *c = &data->columns[i];
231
- zsv_writer_cell(data->csv_writer, 1, (const unsigned char *)c->name, c->name ? strlen(c->name) : 0, 1);
232
- }
233
- } else {
234
- write_headers(data);
235
- for (unsigned int i = 0; i < data->col_count; i++) {
236
- struct zsv_desc_column_data *c = &data->columns[i];
237
- zsv_writer_cell_zu(data->csv_writer, 1, i + 1);
238
- zsv_writer_cell_s(data->csv_writer, 0, (unsigned char *)c->name, 1);
239
- if (c->lengths.lo) {
240
- zsv_writer_cell_zu(data->csv_writer, 0, c->lengths.lo);
241
- zsv_writer_cell_zu(data->csv_writer, 0, c->lengths.hi);
242
- } else {
243
- zsv_writer_cell(data->csv_writer, 0, NULL, 0, 0);
244
- zsv_writer_cell(data->csv_writer, 0, NULL, 0, 0);
245
- }
246
-
247
- // unique
248
- if (data->flags & ZSV_DESC_FLAG_UNIQUE) {
249
- const char *s = c->not_unique ? "FALSE" : "TRUE";
250
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)s, 0);
251
- }
252
-
253
- // unique_ci
254
- if (data->flags & ZSV_DESC_FLAG_UNIQUE_CI) {
255
- const char *s = c->not_unique_ci ? "FALSE" : "TRUE";
256
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)s, 0);
257
- }
258
-
259
- // count, blank %
260
- zsv_writer_cell_zu(data->csv_writer, 0, c->total_count);
261
- zsv_writer_cell_Lf(data->csv_writer, 0, ".2",
262
- ((long double)c->mblank.count) / (long double)(c->total_count) * (long double)100);
263
-
264
- for (struct zsv_desc_string_list *sl = c->examples; sl; sl = sl->next) {
265
- if (sl->count) {
266
- char *tmp;
267
- asprintf(&tmp, "%s (%zu)", sl->value, sl->count + 1);
268
- zsv_writer_cell_s(data->csv_writer, 0, (unsigned char *)tmp, 1);
269
- free(tmp);
270
- } else
271
- zsv_writer_cell_s(data->csv_writer, 0, sl->value, 1);
272
- }
273
- }
274
- }
275
- }
276
-
277
- static void zsv_desc_set_err(struct zsv_desc_data *data, enum zsv_desc_status err, char *msg) {
278
- data->err = err;
279
- if (msg) {
280
- if (data->err_msg)
281
- free(msg);
282
- else
283
- data->err_msg = msg;
284
- }
285
- }
286
-
287
- // zsv_desc_column_update_unique(): return 1 if unique, 0 if dupe
288
- static int zsv_desc_column_update_unique(struct zsv_desc_unique_key_container *key_container,
289
- const unsigned char *utf8_value, size_t len) {
290
- zsv_desc_unique_key *key = zsv_desc_unique_key_new(utf8_value, len);
291
- if (sglib_zsv_desc_unique_key_find_member(key_container->key, key)) { // not unique
292
- if (key_container->count > key_container->max_count) {
293
- zsv_desc_column_unique_values_delete(&key_container->key);
294
- key_container->not_enum = 1;
295
- }
296
- zsv_desc_unique_key_delete(key);
297
- return 0;
298
- } else {
299
- sglib_zsv_desc_unique_key_add(&key_container->key, key);
300
- key_container->count++;
301
- return 1;
302
- }
303
- }
304
-
305
- static void zsv_desc_cell(void *ctx, unsigned char *restrict utf8_value, size_t len) {
306
- struct zsv_desc_data *data = ctx;
307
- if (!data || data->err || data->done)
308
- return;
309
-
310
- // trim the cell values, so we don't count e.g. " abc" as different from "abc"
311
- utf8_value = (unsigned char *)zsv_strtrim(utf8_value, &len);
312
- if (data->row_count == 0) {
313
- if (data->current_column_ix < data->max_cols) {
314
- struct zsv_desc_column_name *e = calloc(1, sizeof(*e));
315
- if (!e) {
316
- zsv_desc_set_err(data, zsv_desc_status_memory, NULL);
317
- return;
318
- }
319
-
320
- if (len)
321
- e->name = zsv_memdup(utf8_value, len);
322
- *data->column_names_tail = e;
323
- data->column_names_tail = &e->next;
324
- }
325
- } else {
326
- if (data->current_column_ix < data->col_count) {
327
- struct zsv_desc_column_data *col = &data->columns[data->current_column_ix];
328
- if (col) {
329
- col->total_count++;
330
- if (!len)
331
- col->mblank.count++;
332
- else {
333
- if (col->lengths.lo == 0 || len < col->lengths.lo)
334
- col->lengths.lo = len;
335
- if (len > col->lengths.hi)
336
- col->lengths.hi = len;
337
- if (col->examples_count < ZSV_DESC_MAX_EXAMPLE_COUNT || !data->quick) {
338
- char already_have = 0;
339
- if (!col->examples_tail)
340
- col->examples_tail = &col->examples;
341
- for (struct zsv_desc_string_list *sl = col->examples; !already_have && sl; sl = sl->next) {
342
- if (sl->value && !zsv_strincmp(utf8_value, len, sl->value, strlen((char *)sl->value))) {
343
- already_have = 1;
344
- sl->count++;
345
- }
346
- }
347
- if (!already_have && col->examples_count < ZSV_DESC_MAX_EXAMPLE_COUNT) {
348
- struct zsv_desc_string_list *sl;
349
- if ((sl = *col->examples_tail = calloc(1, sizeof(*sl)))) {
350
- col->examples_tail = &sl->next;
351
- sl->value = zsv_memdup(utf8_value, len);
352
- col->examples_count++;
353
- }
354
- }
355
- }
356
-
357
- if (data->flags & ZSV_DESC_FLAG_UNIQUE) {
358
- if (!col->not_unique)
359
- if (!zsv_desc_column_update_unique(&col->unique_values, utf8_value, len)) // dupe
360
- col->not_unique = 1;
361
- }
362
-
363
- if (data->flags & ZSV_DESC_FLAG_UNIQUE_CI) {
364
- if (!col->not_unique_ci || !col->unique_values_ci.not_enum
365
- // )
366
- ) {
367
- unsigned char *lc = zsv_strtolowercase(utf8_value, &len);
368
- if (lc) {
369
- if (!zsv_desc_column_update_unique(&col->unique_values_ci, lc, len))
370
- col->not_unique_ci = 1;
371
- free(lc);
372
- }
373
- }
374
- }
375
- }
376
- }
377
- }
378
- }
379
- data->current_column_ix++;
380
- }
381
-
382
- static void zsv_desc_row(void *ctx) {
383
- struct zsv_desc_data *data = ctx;
384
-
385
- if (!data || data->err)
386
- return;
387
-
388
- if (data->row_count == 0) {
389
- if (data->current_column_ix < data->max_cols)
390
- data->col_count = data->current_column_ix;
391
- else
392
- data->current_column_ix = data->max_cols;
393
- if (!(data->columns = calloc(data->col_count, sizeof(*data->columns)))) {
394
- zsv_desc_set_err(data, zsv_desc_status_memory, NULL);
395
- return;
396
- }
397
-
398
- struct zsv_desc_column_name *cn = data->column_names;
399
- for (unsigned int i = 0; i < data->col_count && cn; cn = cn->next, i++) {
400
- struct zsv_desc_column_data *col = &data->columns[i];
401
- if (cn->name && *cn->name) {
402
- col->name = cn->name;
403
- cn->name = NULL;
404
- }
405
- col->unique_values_ci.max_count = data->max_enum;
406
- }
407
-
408
- if (data->header_only) {
409
- data->done = 1;
410
- zsv_abort(data->parser);
411
- }
412
- } else {
413
- if (data->row_count % 50000 == 0 && data->opts->verbose)
414
- fprintf(stderr, "%zu rows read\n", data->row_count);
415
- }
416
-
417
- data->current_column_ix = 0;
418
- ++data->row_count;
419
- }
420
-
421
- const char *zsv_desc_usage_msg[] = {
422
- APPNAME ": get column-level information about a table's content",
423
- "",
424
- "Usage: " APPNAME " [options] <filename>",
425
- "",
426
- "Options:",
427
- " -b,--with-bom : output with BOM",
428
- " -C <max_num_of_columns> : maximum number of columns (default: " ZSV_DESC_MAX_COLS_DEFAULT_S ")",
429
- " -H : output header names only",
430
- " -q,--quick : minimize example counts",
431
- " -a,--all : calculate all metadata (for now, this only adds uniqueness info)",
432
- " -o <filename> : filename to save output to (default: stdout)",
433
- NULL,
434
- };
435
-
436
- static int zsv_desc_usage(void) {
437
- for (size_t i = 0; zsv_desc_usage_msg[i]; i++)
438
- fprintf(stdout, "%s\n", zsv_desc_usage_msg[i]);
439
- return 0;
440
- }
441
-
442
- static void zsv_desc_cleanup(struct zsv_desc_data *data) {
443
- if (data->columns) {
444
- for (unsigned int i = 0; i < data->col_count; i++)
445
- zsv_desc_column_data_free(&data->columns[i]);
446
- free(data->columns);
447
- data->columns = NULL;
448
- }
449
-
450
- zsv_desc_column_names_delete(&data->column_names);
451
- free(data->err_msg);
452
- data->err_msg = NULL;
453
-
454
- if (data->opts->stream && data->opts->stream != stdin) {
455
- fclose(data->opts->stream);
456
- data->opts->stream = NULL;
457
- }
458
-
459
- if (data->overflowed) {
460
- fprintf(stderr, "Warning: data overflowed %zu times (example: %s)\n", data->overflow_count, data->overflowed);
461
- free(data->overflowed);
462
- }
463
- zsv_writer_delete(data->csv_writer);
464
- }
465
-
466
- #define ZSV_DESC_TMPFN_TEMPLATE "zsv_desc_XXXXXXXXXXXX"
467
-
468
- static void zsv_desc_execute(struct zsv_desc_data *data, struct zsv_prop_handler *custom_prop_handler,
469
- const char *input_path) {
470
- data->opts->cell_handler = zsv_desc_cell;
471
- data->opts->row_handler = zsv_desc_row;
472
- data->opts->ctx = data;
473
-
474
- if (!data->max_enum)
475
- data->max_enum = ZSV_DESC_MAX_ENUM_DEFAULT;
476
- if (zsv_new_with_properties(data->opts, custom_prop_handler, input_path, &data->parser) == zsv_status_ok) {
477
- FILE *input_temp_file = NULL;
478
- enum zsv_status status;
479
- if (input_temp_file)
480
- zsv_set_scan_filter(data->parser, zsv_filter_write, input_temp_file);
481
- while (!zsv_signal_interrupted && (status = zsv_parse_more(data->parser)) == zsv_status_ok)
482
- ;
483
-
484
- if (input_temp_file)
485
- fclose(input_temp_file);
486
- zsv_finish(data->parser);
487
- zsv_delete(data->parser);
488
- }
489
- }
490
-
491
- int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
492
- struct zsv_prop_handler *custom_prop_handler) {
493
- if (argc < 1)
494
- zsv_desc_usage();
495
- else if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))
496
- zsv_desc_usage();
497
- else {
498
- struct zsv_desc_data data = {0};
499
- const char *input_path = NULL;
500
- int err = 0;
501
- if (opts->malformed_utf8_replace != ZSV_MALFORMED_UTF8_DO_NOT_REPLACE) // user specified to be 'none'
502
- opts->malformed_utf8_replace = '?';
503
-
504
- data.opts = opts;
505
- data.max_cols = ZSV_DESC_MAX_COLS_DEFAULT; // default
506
- data.column_names_tail = &data.column_names;
507
-
508
- struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
509
-
510
- for (int arg_i = 1; !err && !data.err && arg_i < argc; arg_i++) {
511
- if (!strcmp(argv[arg_i], "-b") || !strcmp(argv[arg_i], "--with-bom"))
512
- writer_opts.with_bom = 1;
513
- else if (!strcmp(argv[arg_i], "-o") || !strcmp(argv[arg_i], "--output"))
514
- writer_opts.output_path = zsv_next_arg(++arg_i, argc, argv, (int *)&data.err);
515
- else if (!strcmp(argv[arg_i], "-a") || !strcmp(argv[arg_i], "--all"))
516
- data.flags = 0xff;
517
- else if (!strcmp(argv[arg_i], "-q") || !strcmp(argv[arg_i], "--quick"))
518
- data.quick = 1;
519
- else if (!strcmp(argv[arg_i], "-H"))
520
- data.header_only = 1;
521
- else if (!strcmp(argv[arg_i], "-C")) {
522
- arg_i++;
523
- if (!(arg_i < argc && atoi(argv[arg_i]) > 9))
524
- data.err = zsv_printerr(zsv_desc_status_error,
525
- "-C (max cols) invalid: should be positive integer > 9 (got %s)", argv[arg_i]);
526
- else
527
- data.max_cols = atoi(argv[arg_i]);
528
- } else {
529
- if (data.opts->stream) {
530
- err = 1;
531
- fprintf(stderr, "Input file specified twice, or unrecognized argument: %s\n", argv[arg_i]);
532
- } else if (!(data.opts->stream = fopen(argv[arg_i], "rb"))) {
533
- err = 1;
534
- fprintf(stderr, "Could not open for reading: %s\n", argv[arg_i]);
535
- } else
536
- input_path = argv[arg_i];
537
-
538
- if (data.opts->stream && data.opts->stream != stdin)
539
- data.input_filename = argv[arg_i];
540
- if (err)
541
- data.err = err;
542
- }
543
- }
544
-
545
- zsv_handle_ctrl_c_signal();
546
-
547
- if (!data.err && !(data.csv_writer = zsv_writer_new(&writer_opts)))
548
- data.err = zsv_printerr(zsv_desc_status_error, "Unable to create csv writer");
549
-
550
- if (!data.opts->stream) {
551
- #ifdef NO_STDIN
552
- data.err = zsv_printerr(zsv_desc_status_error, "Please specify an input file");
553
- #else
554
- data.opts->stream = stdin;
555
- #endif
556
- }
557
-
558
- if (data.err) {
559
- zsv_desc_cleanup(&data);
560
- return 1;
561
- }
562
-
563
- zsv_desc_execute(&data, custom_prop_handler, input_path);
564
- zsv_desc_finalize(&data);
565
- zsv_desc_print(&data);
566
- zsv_desc_cleanup(&data);
567
- }
568
- return 0;
569
- }