zsv 1.3.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +41 -3
  4. data/ext/zsv/extconf.rb +1 -1
  5. data/lib/zsv/version.rb +1 -1
  6. metadata +6 -226
  7. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +0 -756
  8. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +0 -381
  9. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +0 -228
  10. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +0 -123
  11. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +0 -39
  12. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +0 -104
  13. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +0 -41
  14. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +0 -1
  15. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +0 -14
  16. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +0 -19
  17. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +0 -116
  18. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +0 -194
  19. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +0 -796
  20. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +0 -41
  21. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +0 -16
  22. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +0 -280
  23. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +0 -36
  24. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +0 -913
  25. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +0 -23
  26. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +0 -20
  27. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +0 -140
  28. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +0 -91
  29. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +0 -81
  30. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +0 -82
  31. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +0 -404
  32. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +0 -569
  33. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +0 -365
  34. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +0 -366
  35. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +0 -341
  36. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +0 -263
  37. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +0 -298
  38. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +0 -157
  39. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +0 -177
  40. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +0 -444
  41. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +0 -145
  42. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +0 -110
  43. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +0 -15
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +0 -64
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +0 -1955
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +0 -6802
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +0 -230517
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +0 -12174
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +0 -2
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +0 -142
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +0 -49
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +0 -485
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +0 -1015
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +0 -663
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +0 -85
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +0 -75
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +0 -167
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +0 -228
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +0 -186
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +0 -23
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +0 -76
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +0 -167
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +0 -238
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +0 -186
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +0 -184
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +0 -52
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +0 -34
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +0 -103
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +0 -57
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +0 -69
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +0 -220
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +0 -34
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +0 -362
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +0 -764
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +0 -117
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +0 -508
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +0 -78
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +0 -505
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +0 -7
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +0 -59
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +0 -208
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +0 -795
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +0 -28
  84. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +0 -851
  85. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +0 -106
  86. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +0 -6
  87. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +0 -113
  88. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +0 -90
  89. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +0 -295
  90. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +0 -175
  91. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +0 -693
  92. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +0 -980
  93. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +0 -131
  94. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +0 -130
  95. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +0 -118
  96. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +0 -45
  97. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +0 -41
  98. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +0 -107
  99. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +0 -20
  100. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +0 -61
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +0 -14
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +0 -192
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +0 -72
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +0 -812
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +0 -753
  106. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +0 -372
  107. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +0 -15
  108. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +0 -119
  109. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +0 -45
  110. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +0 -63
  111. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +0 -12
  112. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +0 -166
  113. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +0 -214
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +0 -128
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +0 -43
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +0 -81
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +0 -25
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +0 -325
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +0 -73
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +0 -203
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +0 -7
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +0 -318
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +0 -134
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +0 -119
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +0 -322
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +0 -203
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +0 -36
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +0 -167
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +0 -36
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +0 -153
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +0 -32
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +0 -312
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +0 -29
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +0 -266
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +0 -9
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +0 -60
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +0 -1007
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +0 -453
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +0 -101
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +0 -49
  141. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +0 -393
  142. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +0 -322
  143. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +0 -228
  144. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +0 -91
  145. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +0 -240
  146. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +0 -63
  147. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +0 -57
  148. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +0 -148
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +0 -2
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +0 -427
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +0 -253
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +0 -121
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +0 -20
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +0 -159
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +0 -24
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +0 -180
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +0 -256
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +0 -197
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +0 -49
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +0 -400
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +0 -120
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +0 -18
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +0 -132
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +0 -178
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +0 -258
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +0 -246
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +0 -123
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +0 -153
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +0 -54
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +0 -267
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +0 -53
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +0 -357
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +0 -83
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +0 -33
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +0 -184
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +0 -292
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +0 -259
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +0 -13
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +0 -255
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +0 -96
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +0 -361
  182. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +0 -40
  183. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +0 -16
  184. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +0 -44
  185. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +0 -3
  186. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +0 -100
  187. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +0 -143
  188. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +0 -89
  189. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +0 -123
  190. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +0 -16
  191. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +0 -336
  192. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +0 -361
  193. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +0 -62
  194. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +0 -113
  195. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +0 -73
  196. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +0 -329
  197. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +0 -90
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +0 -49
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +0 -36
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +0 -58
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +0 -19
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +0 -147
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +0 -22
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +0 -28
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +0 -22
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +0 -17
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +0 -99
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +0 -65
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +0 -19
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +0 -19
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +0 -13
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +0 -54
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +0 -71
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +0 -53
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +0 -107
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +0 -18
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +0 -11
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +0 -148
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +0 -41
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +0 -25
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +0 -101
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +0 -33
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +0 -20
  224. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +0 -60
  225. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +0 -484
  226. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +0 -731
  227. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +0 -285
  228. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +0 -88
  229. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +0 -51
@@ -1,569 +0,0 @@
1
- /*
2
- * Copyright (C) 2021 Liquidaty and the zsv/lib contributors
3
- * All rights reserved
4
- *
5
- * This file is part of zsv/lib, distributed under the license defined at
6
- * https://opensource.org/licenses/MIT
7
- */
8
-
9
- #include <sglib.h>
10
-
11
- #include <stdio.h>
12
- #include <stdlib.h>
13
- #include <string.h>
14
- #include <math.h>
15
- #include <fenv.h>
16
- #include <time.h>
17
- #include <unistd.h> // unlink()
18
-
19
- #define ZSV_COMMAND desc
20
- #include "zsv_command.h"
21
-
22
- #include <zsv/utils/writer.h>
23
- #include <zsv/utils/file.h>
24
- #include <zsv/utils/mem.h>
25
- #include <zsv/utils/string.h>
26
-
27
- #define ZSV_DESC_MAX_COLS_DEFAULT 32768
28
- #define ZSV_DESC_MAX_COLS_DEFAULT_S "32768"
29
-
30
- #define ZSV_DESC_FLAG_MINMAX 1
31
- #define ZSV_DESC_FLAG_MINMAXLEN 2
32
- #define ZSV_DESC_FLAG_UNIQUE 32
33
- #define ZSV_DESC_FLAG_UNIQUE_CI 64
34
-
35
- struct zsv_desc_string_list {
36
- struct zsv_desc_string_list *next;
37
- unsigned char *value;
38
- size_t count;
39
- };
40
-
41
- void zsv_desc_string_list_free(struct zsv_desc_string_list *e) {
42
- struct zsv_desc_string_list *n;
43
- for (; e; e = n) {
44
- n = e->next;
45
- if (e->value)
46
- free(e->value);
47
- free(e);
48
- }
49
- }
50
-
51
- typedef struct zsv_desc_unique_key {
52
- unsigned char color : 1;
53
- unsigned char _ : 7;
54
- unsigned char *value;
55
- struct zsv_desc_unique_key *left;
56
- struct zsv_desc_unique_key *right;
57
- } zsv_desc_unique_key;
58
-
59
- struct zsv_desc_unique_key_container {
60
- struct zsv_desc_unique_key *key;
61
- size_t max_count;
62
- size_t count;
63
- unsigned char not_enum : 1;
64
- unsigned char dummy : 7;
65
- };
66
-
67
- static struct zsv_desc_unique_key *zsv_desc_unique_key_new(const unsigned char *value, size_t len) {
68
- zsv_desc_unique_key *key = calloc(1, sizeof(*key));
69
- if (!key || !(key->value = malloc(len + 1)))
70
- ; // handle out-of-memory error!
71
- else {
72
- memcpy(key->value, value, len);
73
- key->value[len] = '\0';
74
- }
75
- return key;
76
- }
77
-
78
- static void zsv_desc_unique_key_delete(zsv_desc_unique_key *e) {
79
- if (e)
80
- free(e->value);
81
- free(e);
82
- }
83
-
84
- static int zsv_desc_unique_key_cmp(zsv_desc_unique_key *x, zsv_desc_unique_key *y) {
85
- return strcmp((const char *)x->value, (const char *)y->value);
86
- }
87
-
88
- SGLIB_DEFINE_RBTREE_PROTOTYPES(zsv_desc_unique_key, left, right, color, zsv_desc_unique_key_cmp);
89
- SGLIB_DEFINE_RBTREE_FUNCTIONS(zsv_desc_unique_key, left, right, color, zsv_desc_unique_key_cmp);
90
-
91
- #define ZSV_DESC_MAX_EXAMPLE_COUNT 5 // could make this customizable...
92
- struct zsv_desc_column_data {
93
- char *name;
94
- unsigned int position;
95
-
96
- unsigned char not_unique : 1;
97
- unsigned char not_unique_ci : 1;
98
- unsigned char _ : 6;
99
-
100
- struct zsv_desc_unique_key_container unique_values;
101
- struct zsv_desc_unique_key_container unique_values_ci;
102
- struct zsv_desc_string_list *examples;
103
- struct zsv_desc_string_list **examples_tail;
104
- unsigned int examples_count;
105
-
106
- unsigned int total_count;
107
- struct {
108
- unsigned int count;
109
- } mblank;
110
-
111
- struct {
112
- size_t lo;
113
- size_t hi;
114
- } lengths;
115
- };
116
-
117
- static void zsv_desc_column_data_finalize(struct zsv_desc_column_data *col, unsigned int i) {
118
- col->position = i;
119
- }
120
-
121
- static void zsv_desc_column_unique_values_delete(zsv_desc_unique_key **tree) {
122
- if (tree && *tree) {
123
- struct sglib_zsv_desc_unique_key_iterator it;
124
- struct zsv_desc_unique_key *e;
125
- for (e = sglib_zsv_desc_unique_key_it_init(&it, *tree); e; e = sglib_zsv_desc_unique_key_it_next(&it))
126
- zsv_desc_unique_key_delete(e);
127
- *tree = NULL;
128
- }
129
- }
130
-
131
- static void zsv_desc_column_data_free(struct zsv_desc_column_data *e) {
132
- free(e->name);
133
- zsv_desc_column_unique_values_delete(&e->unique_values.key);
134
- zsv_desc_column_unique_values_delete(&e->unique_values_ci.key);
135
- zsv_desc_string_list_free(e->examples);
136
- }
137
-
138
- struct zsv_desc_column_name {
139
- struct zsv_desc_column_name *next;
140
- char *name;
141
- };
142
-
143
- static void zsv_desc_column_names_delete(struct zsv_desc_column_name **p) {
144
- if (p && *p) {
145
- struct zsv_desc_column_name *next;
146
- for (struct zsv_desc_column_name *e = *p; e; e = next) {
147
- next = e->next;
148
- free(e->name);
149
- free(e);
150
- }
151
- *p = NULL;
152
- }
153
- }
154
-
155
- enum zsv_desc_status {
156
- zsv_desc_status_ok = 0,
157
- zsv_desc_status_error, // generic error
158
- zsv_desc_status_memory,
159
- zsv_desc_status_file,
160
- zsv_desc_status_argument
161
- };
162
-
163
- struct zsv_desc_data {
164
- struct zsv_opts *opts;
165
- const char *input_filename;
166
- zsv_csv_writer csv_writer;
167
-
168
- char header_only;
169
- char *filename;
170
-
171
- void (*header_func)(void *ctx, unsigned int col_ix, const char *name); // api use only
172
- void *header_func_arg;
173
- unsigned int errcount;
174
-
175
- unsigned int max_cols;
176
- unsigned int current_column_ix;
177
-
178
- struct zsv_desc_column_name *column_names;
179
- struct zsv_desc_column_name **column_names_tail;
180
-
181
- unsigned int col_count;
182
- struct zsv_desc_column_data *columns;
183
-
184
- #define ZSV_DESC_MAX_ENUM_DEFAULT 100
185
- size_t max_enum;
186
- size_t row_count;
187
-
188
- char *err_msg;
189
- enum zsv_desc_status err;
190
-
191
- zsv_parser parser;
192
-
193
- unsigned char flags; // see ZSV_DESC_FLAG_XXX
194
- unsigned char done;
195
-
196
- size_t max_row_size;
197
-
198
- char *overflowed;
199
- size_t overflow_count;
200
-
201
- unsigned char quick : 1;
202
- unsigned char _ : 7;
203
- };
204
-
205
- static void zsv_desc_finalize(struct zsv_desc_data *data) {
206
- for (unsigned int i = 0; i < data->col_count; i++)
207
- zsv_desc_column_data_finalize(&data->columns[i], i);
208
- }
209
-
210
- static void write_headers(struct zsv_desc_data *data) {
211
- // TO DO: adjust header for ZSV_DESC_FLAG options
212
- const char *headers1[] = {"#", "Column name", "Min Length", "Max Length", NULL};
213
- const char *headers2[] = {"Count", "Blank %", "Example 1", "Example 2", "Example 3", "Example 4", "Example 5", NULL};
214
- for (int i = 0; headers1[i]; i++)
215
- zsv_writer_cell(data->csv_writer, i == 0, (const unsigned char *)headers1[i], strlen(headers1[i]), 1);
216
-
217
- if (data->flags & ZSV_DESC_FLAG_UNIQUE)
218
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)"Unique", 0);
219
-
220
- if (data->flags & ZSV_DESC_FLAG_UNIQUE_CI)
221
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)"Unique (case-insensitive)", 0);
222
-
223
- for (int i = 0; headers2[i]; i++)
224
- zsv_writer_cell(data->csv_writer, 0, (const unsigned char *)headers2[i], strlen(headers2[i]), 1);
225
- }
226
-
227
- static void zsv_desc_print(struct zsv_desc_data *data) {
228
- if (data->header_only) {
229
- for (unsigned int i = 0; i < data->col_count; i++) {
230
- struct zsv_desc_column_data *c = &data->columns[i];
231
- zsv_writer_cell(data->csv_writer, 1, (const unsigned char *)c->name, c->name ? strlen(c->name) : 0, 1);
232
- }
233
- } else {
234
- write_headers(data);
235
- for (unsigned int i = 0; i < data->col_count; i++) {
236
- struct zsv_desc_column_data *c = &data->columns[i];
237
- zsv_writer_cell_zu(data->csv_writer, 1, i + 1);
238
- zsv_writer_cell_s(data->csv_writer, 0, (unsigned char *)c->name, 1);
239
- if (c->lengths.lo) {
240
- zsv_writer_cell_zu(data->csv_writer, 0, c->lengths.lo);
241
- zsv_writer_cell_zu(data->csv_writer, 0, c->lengths.hi);
242
- } else {
243
- zsv_writer_cell(data->csv_writer, 0, NULL, 0, 0);
244
- zsv_writer_cell(data->csv_writer, 0, NULL, 0, 0);
245
- }
246
-
247
- // unique
248
- if (data->flags & ZSV_DESC_FLAG_UNIQUE) {
249
- const char *s = c->not_unique ? "FALSE" : "TRUE";
250
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)s, 0);
251
- }
252
-
253
- // unique_ci
254
- if (data->flags & ZSV_DESC_FLAG_UNIQUE_CI) {
255
- const char *s = c->not_unique_ci ? "FALSE" : "TRUE";
256
- zsv_writer_cell_s(data->csv_writer, 0, (const unsigned char *)s, 0);
257
- }
258
-
259
- // count, blank %
260
- zsv_writer_cell_zu(data->csv_writer, 0, c->total_count);
261
- zsv_writer_cell_Lf(data->csv_writer, 0, ".2",
262
- ((long double)c->mblank.count) / (long double)(c->total_count) * (long double)100);
263
-
264
- for (struct zsv_desc_string_list *sl = c->examples; sl; sl = sl->next) {
265
- if (sl->count) {
266
- char *tmp;
267
- asprintf(&tmp, "%s (%zu)", sl->value, sl->count + 1);
268
- zsv_writer_cell_s(data->csv_writer, 0, (unsigned char *)tmp, 1);
269
- free(tmp);
270
- } else
271
- zsv_writer_cell_s(data->csv_writer, 0, sl->value, 1);
272
- }
273
- }
274
- }
275
- }
276
-
277
- static void zsv_desc_set_err(struct zsv_desc_data *data, enum zsv_desc_status err, char *msg) {
278
- data->err = err;
279
- if (msg) {
280
- if (data->err_msg)
281
- free(msg);
282
- else
283
- data->err_msg = msg;
284
- }
285
- }
286
-
287
- // zsv_desc_column_update_unique(): return 1 if unique, 0 if dupe
288
- static int zsv_desc_column_update_unique(struct zsv_desc_unique_key_container *key_container,
289
- const unsigned char *utf8_value, size_t len) {
290
- zsv_desc_unique_key *key = zsv_desc_unique_key_new(utf8_value, len);
291
- if (sglib_zsv_desc_unique_key_find_member(key_container->key, key)) { // not unique
292
- if (key_container->count > key_container->max_count) {
293
- zsv_desc_column_unique_values_delete(&key_container->key);
294
- key_container->not_enum = 1;
295
- }
296
- zsv_desc_unique_key_delete(key);
297
- return 0;
298
- } else {
299
- sglib_zsv_desc_unique_key_add(&key_container->key, key);
300
- key_container->count++;
301
- return 1;
302
- }
303
- }
304
-
305
- static void zsv_desc_cell(void *ctx, unsigned char *restrict utf8_value, size_t len) {
306
- struct zsv_desc_data *data = ctx;
307
- if (!data || data->err || data->done)
308
- return;
309
-
310
- // trim the cell values, so we don't count e.g. " abc" as different from "abc"
311
- utf8_value = (unsigned char *)zsv_strtrim(utf8_value, &len);
312
- if (data->row_count == 0) {
313
- if (data->current_column_ix < data->max_cols) {
314
- struct zsv_desc_column_name *e = calloc(1, sizeof(*e));
315
- if (!e) {
316
- zsv_desc_set_err(data, zsv_desc_status_memory, NULL);
317
- return;
318
- }
319
-
320
- if (len)
321
- e->name = zsv_memdup(utf8_value, len);
322
- *data->column_names_tail = e;
323
- data->column_names_tail = &e->next;
324
- }
325
- } else {
326
- if (data->current_column_ix < data->col_count) {
327
- struct zsv_desc_column_data *col = &data->columns[data->current_column_ix];
328
- if (col) {
329
- col->total_count++;
330
- if (!len)
331
- col->mblank.count++;
332
- else {
333
- if (col->lengths.lo == 0 || len < col->lengths.lo)
334
- col->lengths.lo = len;
335
- if (len > col->lengths.hi)
336
- col->lengths.hi = len;
337
- if (col->examples_count < ZSV_DESC_MAX_EXAMPLE_COUNT || !data->quick) {
338
- char already_have = 0;
339
- if (!col->examples_tail)
340
- col->examples_tail = &col->examples;
341
- for (struct zsv_desc_string_list *sl = col->examples; !already_have && sl; sl = sl->next) {
342
- if (sl->value && !zsv_strincmp(utf8_value, len, sl->value, strlen((char *)sl->value))) {
343
- already_have = 1;
344
- sl->count++;
345
- }
346
- }
347
- if (!already_have && col->examples_count < ZSV_DESC_MAX_EXAMPLE_COUNT) {
348
- struct zsv_desc_string_list *sl;
349
- if ((sl = *col->examples_tail = calloc(1, sizeof(*sl)))) {
350
- col->examples_tail = &sl->next;
351
- sl->value = zsv_memdup(utf8_value, len);
352
- col->examples_count++;
353
- }
354
- }
355
- }
356
-
357
- if (data->flags & ZSV_DESC_FLAG_UNIQUE) {
358
- if (!col->not_unique)
359
- if (!zsv_desc_column_update_unique(&col->unique_values, utf8_value, len)) // dupe
360
- col->not_unique = 1;
361
- }
362
-
363
- if (data->flags & ZSV_DESC_FLAG_UNIQUE_CI) {
364
- if (!col->not_unique_ci || !col->unique_values_ci.not_enum
365
- // )
366
- ) {
367
- unsigned char *lc = zsv_strtolowercase(utf8_value, &len);
368
- if (lc) {
369
- if (!zsv_desc_column_update_unique(&col->unique_values_ci, lc, len))
370
- col->not_unique_ci = 1;
371
- free(lc);
372
- }
373
- }
374
- }
375
- }
376
- }
377
- }
378
- }
379
- data->current_column_ix++;
380
- }
381
-
382
- static void zsv_desc_row(void *ctx) {
383
- struct zsv_desc_data *data = ctx;
384
-
385
- if (!data || data->err)
386
- return;
387
-
388
- if (data->row_count == 0) {
389
- if (data->current_column_ix < data->max_cols)
390
- data->col_count = data->current_column_ix;
391
- else
392
- data->current_column_ix = data->max_cols;
393
- if (!(data->columns = calloc(data->col_count, sizeof(*data->columns)))) {
394
- zsv_desc_set_err(data, zsv_desc_status_memory, NULL);
395
- return;
396
- }
397
-
398
- struct zsv_desc_column_name *cn = data->column_names;
399
- for (unsigned int i = 0; i < data->col_count && cn; cn = cn->next, i++) {
400
- struct zsv_desc_column_data *col = &data->columns[i];
401
- if (cn->name && *cn->name) {
402
- col->name = cn->name;
403
- cn->name = NULL;
404
- }
405
- col->unique_values_ci.max_count = data->max_enum;
406
- }
407
-
408
- if (data->header_only) {
409
- data->done = 1;
410
- zsv_abort(data->parser);
411
- }
412
- } else {
413
- if (data->row_count % 50000 == 0 && data->opts->verbose)
414
- fprintf(stderr, "%zu rows read\n", data->row_count);
415
- }
416
-
417
- data->current_column_ix = 0;
418
- ++data->row_count;
419
- }
420
-
421
- const char *zsv_desc_usage_msg[] = {
422
- APPNAME ": get column-level information about a table's content",
423
- "",
424
- "Usage: " APPNAME " [options] <filename>",
425
- "",
426
- "Options:",
427
- " -b,--with-bom : output with BOM",
428
- " -C <max_num_of_columns> : maximum number of columns (default: " ZSV_DESC_MAX_COLS_DEFAULT_S ")",
429
- " -H : output header names only",
430
- " -q,--quick : minimize example counts",
431
- " -a,--all : calculate all metadata (for now, this only adds uniqueness info)",
432
- " -o <filename> : filename to save output to (default: stdout)",
433
- NULL,
434
- };
435
-
436
- static int zsv_desc_usage(void) {
437
- for (size_t i = 0; zsv_desc_usage_msg[i]; i++)
438
- fprintf(stdout, "%s\n", zsv_desc_usage_msg[i]);
439
- return 0;
440
- }
441
-
442
- static void zsv_desc_cleanup(struct zsv_desc_data *data) {
443
- if (data->columns) {
444
- for (unsigned int i = 0; i < data->col_count; i++)
445
- zsv_desc_column_data_free(&data->columns[i]);
446
- free(data->columns);
447
- data->columns = NULL;
448
- }
449
-
450
- zsv_desc_column_names_delete(&data->column_names);
451
- free(data->err_msg);
452
- data->err_msg = NULL;
453
-
454
- if (data->opts->stream && data->opts->stream != stdin) {
455
- fclose(data->opts->stream);
456
- data->opts->stream = NULL;
457
- }
458
-
459
- if (data->overflowed) {
460
- fprintf(stderr, "Warning: data overflowed %zu times (example: %s)\n", data->overflow_count, data->overflowed);
461
- free(data->overflowed);
462
- }
463
- zsv_writer_delete(data->csv_writer);
464
- }
465
-
466
- #define ZSV_DESC_TMPFN_TEMPLATE "zsv_desc_XXXXXXXXXXXX"
467
-
468
- static void zsv_desc_execute(struct zsv_desc_data *data, struct zsv_prop_handler *custom_prop_handler,
469
- const char *input_path) {
470
- data->opts->cell_handler = zsv_desc_cell;
471
- data->opts->row_handler = zsv_desc_row;
472
- data->opts->ctx = data;
473
-
474
- if (!data->max_enum)
475
- data->max_enum = ZSV_DESC_MAX_ENUM_DEFAULT;
476
- if (zsv_new_with_properties(data->opts, custom_prop_handler, input_path, &data->parser) == zsv_status_ok) {
477
- FILE *input_temp_file = NULL;
478
- enum zsv_status status;
479
- if (input_temp_file)
480
- zsv_set_scan_filter(data->parser, zsv_filter_write, input_temp_file);
481
- while (!zsv_signal_interrupted && (status = zsv_parse_more(data->parser)) == zsv_status_ok)
482
- ;
483
-
484
- if (input_temp_file)
485
- fclose(input_temp_file);
486
- zsv_finish(data->parser);
487
- zsv_delete(data->parser);
488
- }
489
- }
490
-
491
- int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
492
- struct zsv_prop_handler *custom_prop_handler) {
493
- if (argc < 1)
494
- zsv_desc_usage();
495
- else if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))
496
- zsv_desc_usage();
497
- else {
498
- struct zsv_desc_data data = {0};
499
- const char *input_path = NULL;
500
- int err = 0;
501
- if (opts->malformed_utf8_replace != ZSV_MALFORMED_UTF8_DO_NOT_REPLACE) // user specified to be 'none'
502
- opts->malformed_utf8_replace = '?';
503
-
504
- data.opts = opts;
505
- data.max_cols = ZSV_DESC_MAX_COLS_DEFAULT; // default
506
- data.column_names_tail = &data.column_names;
507
-
508
- struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
509
-
510
- for (int arg_i = 1; !err && !data.err && arg_i < argc; arg_i++) {
511
- if (!strcmp(argv[arg_i], "-b") || !strcmp(argv[arg_i], "--with-bom"))
512
- writer_opts.with_bom = 1;
513
- else if (!strcmp(argv[arg_i], "-o") || !strcmp(argv[arg_i], "--output"))
514
- writer_opts.output_path = zsv_next_arg(++arg_i, argc, argv, (int *)&data.err);
515
- else if (!strcmp(argv[arg_i], "-a") || !strcmp(argv[arg_i], "--all"))
516
- data.flags = 0xff;
517
- else if (!strcmp(argv[arg_i], "-q") || !strcmp(argv[arg_i], "--quick"))
518
- data.quick = 1;
519
- else if (!strcmp(argv[arg_i], "-H"))
520
- data.header_only = 1;
521
- else if (!strcmp(argv[arg_i], "-C")) {
522
- arg_i++;
523
- if (!(arg_i < argc && atoi(argv[arg_i]) > 9))
524
- data.err = zsv_printerr(zsv_desc_status_error,
525
- "-C (max cols) invalid: should be positive integer > 9 (got %s)", argv[arg_i]);
526
- else
527
- data.max_cols = atoi(argv[arg_i]);
528
- } else {
529
- if (data.opts->stream) {
530
- err = 1;
531
- fprintf(stderr, "Input file specified twice, or unrecognized argument: %s\n", argv[arg_i]);
532
- } else if (!(data.opts->stream = fopen(argv[arg_i], "rb"))) {
533
- err = 1;
534
- fprintf(stderr, "Could not open for reading: %s\n", argv[arg_i]);
535
- } else
536
- input_path = argv[arg_i];
537
-
538
- if (data.opts->stream && data.opts->stream != stdin)
539
- data.input_filename = argv[arg_i];
540
- if (err)
541
- data.err = err;
542
- }
543
- }
544
-
545
- zsv_handle_ctrl_c_signal();
546
-
547
- if (!data.err && !(data.csv_writer = zsv_writer_new(&writer_opts)))
548
- data.err = zsv_printerr(zsv_desc_status_error, "Unable to create csv writer");
549
-
550
- if (!data.opts->stream) {
551
- #ifdef NO_STDIN
552
- data.err = zsv_printerr(zsv_desc_status_error, "Please specify an input file");
553
- #else
554
- data.opts->stream = stdin;
555
- #endif
556
- }
557
-
558
- if (data.err) {
559
- zsv_desc_cleanup(&data);
560
- return 1;
561
- }
562
-
563
- zsv_desc_execute(&data, custom_prop_handler, input_path);
564
- zsv_desc_finalize(&data);
565
- zsv_desc_print(&data);
566
- zsv_desc_cleanup(&data);
567
- }
568
- return 0;
569
- }