zsv 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +36 -0
  3. data/LICENSE +21 -0
  4. data/README.md +311 -0
  5. data/ext/zsv/common.h +34 -0
  6. data/ext/zsv/extconf.rb +137 -0
  7. data/ext/zsv/options.c +126 -0
  8. data/ext/zsv/options.h +31 -0
  9. data/ext/zsv/options_internal.h +8 -0
  10. data/ext/zsv/parser.c +300 -0
  11. data/ext/zsv/parser.h +62 -0
  12. data/ext/zsv/row.c +122 -0
  13. data/ext/zsv/row.h +39 -0
  14. data/ext/zsv/vendor/zsv-1.3.0/app/2db.c +756 -0
  15. data/ext/zsv/vendor/zsv-1.3.0/app/2json.c +381 -0
  16. data/ext/zsv/vendor/zsv-1.3.0/app/2tsv.c +228 -0
  17. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/help.c +123 -0
  18. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/license.c +39 -0
  19. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/register.c +104 -0
  20. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/thirdparty.c +41 -0
  21. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/unregister.c +1 -0
  22. data/ext/zsv/vendor/zsv-1.3.0/app/builtin/version.c +14 -0
  23. data/ext/zsv/vendor/zsv-1.3.0/app/check/simdutf_wrapper.h +19 -0
  24. data/ext/zsv/vendor/zsv-1.3.0/app/check/utf8.c +116 -0
  25. data/ext/zsv/vendor/zsv-1.3.0/app/check.c +194 -0
  26. data/ext/zsv/vendor/zsv-1.3.0/app/cli.c +796 -0
  27. data/ext/zsv/vendor/zsv-1.3.0/app/cli_const.h +41 -0
  28. data/ext/zsv/vendor/zsv-1.3.0/app/cli_export.h +16 -0
  29. data/ext/zsv/vendor/zsv-1.3.0/app/cli_ini.c +280 -0
  30. data/ext/zsv/vendor/zsv-1.3.0/app/cli_internal.h +36 -0
  31. data/ext/zsv/vendor/zsv-1.3.0/app/compare.c +913 -0
  32. data/ext/zsv/vendor/zsv-1.3.0/app/compare.h +23 -0
  33. data/ext/zsv/vendor/zsv-1.3.0/app/compare_added_column.c +20 -0
  34. data/ext/zsv/vendor/zsv-1.3.0/app/compare_internal.h +140 -0
  35. data/ext/zsv/vendor/zsv-1.3.0/app/compare_sort.c +91 -0
  36. data/ext/zsv/vendor/zsv-1.3.0/app/compare_unique_colname.c +81 -0
  37. data/ext/zsv/vendor/zsv-1.3.0/app/count-pull.c +82 -0
  38. data/ext/zsv/vendor/zsv-1.3.0/app/count.c +404 -0
  39. data/ext/zsv/vendor/zsv-1.3.0/app/desc.c +569 -0
  40. data/ext/zsv/vendor/zsv-1.3.0/app/echo.c +365 -0
  41. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/my_extension.c +366 -0
  42. data/ext/zsv/vendor/zsv-1.3.0/app/ext_example/mysheet_extension.c +341 -0
  43. data/ext/zsv/vendor/zsv-1.3.0/app/ext_template/YOUR_EXTENSION_zsvext.c +263 -0
  44. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.c +298 -0
  45. data/ext/zsv/vendor/zsv-1.3.0/app/external/inih/ini.h +157 -0
  46. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/json_numeric.c +177 -0
  47. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.c +444 -0
  48. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/jsonwriter.h +145 -0
  49. data/ext/zsv/vendor/zsv-1.3.0/app/external/json_writer-1.01/utils.c +110 -0
  50. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/include/memfile.h +15 -0
  51. data/ext/zsv/vendor/zsv-1.3.0/app/external/memfile-1.0/src/memfile.c +64 -0
  52. data/ext/zsv/vendor/zsv-1.3.0/app/external/sglib/sglib.h +1955 -0
  53. data/ext/zsv/vendor/zsv-1.3.0/app/external/simdutf/simdutf.h +6802 -0
  54. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.c +230517 -0
  55. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3.h +12174 -0
  56. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_and_csv_vtab.c +2 -0
  57. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.c +142 -0
  58. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-mem.h +49 -0
  59. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab-zsv.c +485 -0
  60. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3_csv_vtab.c +1015 -0
  61. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/sqlite3ext.h +663 -0
  62. data/ext/zsv/vendor/zsv-1.3.0/app/external/sqlite3/vtab_helper.c +85 -0
  63. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_common.h +75 -0
  64. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_gen.h +167 -0
  65. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_parse.h +228 -0
  66. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_tree.h +186 -0
  67. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/build/yajl-2.1.1/include/yajl/yajl_version.h +23 -0
  68. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_common.h +76 -0
  69. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_gen.h +167 -0
  70. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_parse.h +238 -0
  71. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/api/yajl_tree.h +186 -0
  72. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl.c +184 -0
  73. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.c +52 -0
  74. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_alloc.h +34 -0
  75. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.c +103 -0
  76. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_buf.h +57 -0
  77. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_bytestack.h +69 -0
  78. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.c +220 -0
  79. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_encode.h +34 -0
  80. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_gen.c +362 -0
  81. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.c +764 -0
  82. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_lex.h +117 -0
  83. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.c +508 -0
  84. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_parser.h +78 -0
  85. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_tree.c +505 -0
  86. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl/src/yajl_version.c +7 -0
  87. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/json_value.h +59 -0
  88. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper/yajl_helper.h +208 -0
  89. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper.c +795 -0
  90. data/ext/zsv/vendor/zsv-1.3.0/app/external/yajl_helper/yajl_helper_internal.h +28 -0
  91. data/ext/zsv/vendor/zsv-1.3.0/app/flatten.c +851 -0
  92. data/ext/zsv/vendor/zsv-1.3.0/app/jq.c +106 -0
  93. data/ext/zsv/vendor/zsv-1.3.0/app/jq.h +6 -0
  94. data/ext/zsv/vendor/zsv-1.3.0/app/mv.c +113 -0
  95. data/ext/zsv/vendor/zsv-1.3.0/app/noop.c +90 -0
  96. data/ext/zsv/vendor/zsv-1.3.0/app/overwrite.c +295 -0
  97. data/ext/zsv/vendor/zsv-1.3.0/app/paste.c +175 -0
  98. data/ext/zsv/vendor/zsv-1.3.0/app/pretty.c +693 -0
  99. data/ext/zsv/vendor/zsv-1.3.0/app/prop.c +980 -0
  100. data/ext/zsv/vendor/zsv-1.3.0/app/rm.c +131 -0
  101. data/ext/zsv/vendor/zsv-1.3.0/app/select/fixed.c +130 -0
  102. data/ext/zsv/vendor/zsv-1.3.0/app/select/internal.h +118 -0
  103. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.c +45 -0
  104. data/ext/zsv/vendor/zsv-1.3.0/app/select/parallel.h +41 -0
  105. data/ext/zsv/vendor/zsv-1.3.0/app/select/processing.c +107 -0
  106. data/ext/zsv/vendor/zsv-1.3.0/app/select/rand.c +20 -0
  107. data/ext/zsv/vendor/zsv-1.3.0/app/select/regex.c +61 -0
  108. data/ext/zsv/vendor/zsv-1.3.0/app/select/search.c +14 -0
  109. data/ext/zsv/vendor/zsv-1.3.0/app/select/selection.c +192 -0
  110. data/ext/zsv/vendor/zsv-1.3.0/app/select/usage.c +72 -0
  111. data/ext/zsv/vendor/zsv-1.3.0/app/select-pull.c +812 -0
  112. data/ext/zsv/vendor/zsv-1.3.0/app/select.c +753 -0
  113. data/ext/zsv/vendor/zsv-1.3.0/app/serialize.c +372 -0
  114. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/curses.h +15 -0
  115. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/cursor.c +119 -0
  116. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/errors.c +45 -0
  117. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.c +63 -0
  118. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/file.h +12 -0
  119. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/filter.c +166 -0
  120. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers.c +214 -0
  121. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/handlers_internal.h +128 -0
  122. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/help.c +43 -0
  123. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.c +81 -0
  124. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/index.h +25 -0
  125. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.c +325 -0
  126. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/key-bindings.h +73 -0
  127. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/lexer.c +203 -0
  128. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/newline_handler.c +7 -0
  129. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/pivot.c +318 -0
  130. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.c +134 -0
  131. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/procedure.h +119 -0
  132. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/read-data.c +322 -0
  133. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.c +203 -0
  134. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/screen_buffer.h +36 -0
  135. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet-sql.c +167 -0
  136. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sheet_internal.h +36 -0
  137. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/sqlfilter.c +153 -0
  138. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/terminfo.c +32 -0
  139. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.c +312 -0
  140. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/transformation.h +29 -0
  141. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/ui_buffer.c +266 -0
  142. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/usage.c +9 -0
  143. data/ext/zsv/vendor/zsv-1.3.0/app/sheet/utf8-width.c +60 -0
  144. data/ext/zsv/vendor/zsv-1.3.0/app/sheet.c +1007 -0
  145. data/ext/zsv/vendor/zsv-1.3.0/app/sql.c +453 -0
  146. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.c +101 -0
  147. data/ext/zsv/vendor/zsv-1.3.0/app/sql_internal.h +49 -0
  148. data/ext/zsv/vendor/zsv-1.3.0/app/stack.c +393 -0
  149. data/ext/zsv/vendor/zsv-1.3.0/app/utils/arg.c +322 -0
  150. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cache.c +228 -0
  151. data/ext/zsv/vendor/zsv-1.3.0/app/utils/cat.c +91 -0
  152. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.c +240 -0
  153. data/ext/zsv/vendor/zsv-1.3.0/app/utils/chunk.h +63 -0
  154. data/ext/zsv/vendor/zsv-1.3.0/app/utils/clock.c +57 -0
  155. data/ext/zsv/vendor/zsv-1.3.0/app/utils/db.c +148 -0
  156. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs-no-jq.c +2 -0
  157. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs.c +427 -0
  158. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_from_json.c +253 -0
  159. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dirs_to_json.c +121 -0
  160. data/ext/zsv/vendor/zsv-1.3.0/app/utils/dl.c +20 -0
  161. data/ext/zsv/vendor/zsv-1.3.0/app/utils/emcc/fs_api.c +159 -0
  162. data/ext/zsv/vendor/zsv-1.3.0/app/utils/err.c +24 -0
  163. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file-mem.c +180 -0
  164. data/ext/zsv/vendor/zsv-1.3.0/app/utils/file.c +256 -0
  165. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.c +197 -0
  166. data/ext/zsv/vendor/zsv-1.3.0/app/utils/index.h +49 -0
  167. data/ext/zsv/vendor/zsv-1.3.0/app/utils/jq.c +400 -0
  168. data/ext/zsv/vendor/zsv-1.3.0/app/utils/json.c +120 -0
  169. data/ext/zsv/vendor/zsv-1.3.0/app/utils/mem.c +18 -0
  170. data/ext/zsv/vendor/zsv-1.3.0/app/utils/memmem.c +132 -0
  171. data/ext/zsv/vendor/zsv-1.3.0/app/utils/os.c +178 -0
  172. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite.c +258 -0
  173. data/ext/zsv/vendor/zsv-1.3.0/app/utils/overwrite_writer.c +246 -0
  174. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8-test.c +123 -0
  175. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.c +153 -0
  176. data/ext/zsv/vendor/zsv-1.3.0/app/utils/pcre2-8/pcre2-8.h +54 -0
  177. data/ext/zsv/vendor/zsv-1.3.0/app/utils/prop.c +267 -0
  178. data/ext/zsv/vendor/zsv-1.3.0/app/utils/signal.c +53 -0
  179. data/ext/zsv/vendor/zsv-1.3.0/app/utils/string.c +357 -0
  180. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dir_exists_longpath.c +83 -0
  181. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/dl.c +33 -0
  182. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/fopen_longpath.c +184 -0
  183. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/foreach_dirent_longpath.c +292 -0
  184. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.c +259 -0
  185. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/io.h +13 -0
  186. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/mkdir_longpath.c +255 -0
  187. data/ext/zsv/vendor/zsv-1.3.0/app/utils/win/remove_longpath.c +96 -0
  188. data/ext/zsv/vendor/zsv-1.3.0/app/utils/writer.c +361 -0
  189. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command.h +40 -0
  190. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_command_standalone.c +16 -0
  191. data/ext/zsv/vendor/zsv-1.3.0/app/zsv_main.h +44 -0
  192. data/ext/zsv/vendor/zsv-1.3.0/examples/js/zsv_parser_api_dummy.c +3 -0
  193. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/parse_by_chunk.c +100 -0
  194. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/print_my_column.c +143 -0
  195. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/pull.c +89 -0
  196. data/ext/zsv/vendor/zsv-1.3.0/examples/lib/simple.c +123 -0
  197. data/ext/zsv/vendor/zsv-1.3.0/fuzz/fuzz.c +16 -0
  198. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/api.h +336 -0
  199. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/common.h +361 -0
  200. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation.h +62 -0
  201. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/implementation_private.h +113 -0
  202. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext/sheet.h +73 -0
  203. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/ext.h +329 -0
  204. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/arg.h +90 -0
  205. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/cache.h +49 -0
  206. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/clock.h +36 -0
  207. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/compiler.h +58 -0
  208. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/db.h +19 -0
  209. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dirs.h +147 -0
  210. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/dl.h +22 -0
  211. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/emcc/fs_api.h +28 -0
  212. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/err.h +22 -0
  213. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file-mem.h +17 -0
  214. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/file.h +99 -0
  215. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/jq.h +65 -0
  216. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/json.h +19 -0
  217. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/mem.h +19 -0
  218. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/memmem.h +13 -0
  219. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/os.h +54 -0
  220. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite.h +71 -0
  221. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/overwrite_writer.h +53 -0
  222. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/prop.h +107 -0
  223. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/signal.h +18 -0
  224. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/sql.h +11 -0
  225. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/string.h +148 -0
  226. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/utf8.h +41 -0
  227. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/win/dl.h +25 -0
  228. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/utils/writer.h +101 -0
  229. data/ext/zsv/vendor/zsv-1.3.0/include/zsv/zsv_export.h +33 -0
  230. data/ext/zsv/vendor/zsv-1.3.0/include/zsv.h +20 -0
  231. data/ext/zsv/vendor/zsv-1.3.0/src/vector_delim.c +60 -0
  232. data/ext/zsv/vendor/zsv-1.3.0/src/zsv.c +484 -0
  233. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_internal.c +731 -0
  234. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_delim.c +285 -0
  235. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_scan_fixed.c +88 -0
  236. data/ext/zsv/vendor/zsv-1.3.0/src/zsv_strencode.c +51 -0
  237. data/ext/zsv/zsv_ext.c +343 -0
  238. data/lib/zsv/version.rb +5 -0
  239. data/lib/zsv.rb +81 -0
  240. metadata +340 -0
@@ -0,0 +1,1015 @@
1
+ /*
2
+ ** 2016-05-28
3
+ **
4
+ ** The author disclaims copyright to this source code. In place of
5
+ ** a legal notice, here is a blessing:
6
+ **
7
+ ** May you do good and not evil.
8
+ ** May you find forgiveness for yourself and forgive others.
9
+ ** May you share freely, never taking more than you give.
10
+ **
11
+ ******************************************************************************
12
+ **
13
+ ** This file contains the implementation of an SQLite virtual table for
14
+ ** reading CSV files.
15
+ **
16
+ ** Usage:
17
+ **
18
+ ** .load ./csv
19
+ ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20
+ ** SELECT * FROM csv;
21
+ **
22
+ ** The columns are named "c1", "c2", "c3", ... by default. Or the
23
+ ** application can define its own CREATE TABLE statement using the
24
+ ** schema= parameter, like this:
25
+ **
26
+ ** CREATE VIRTUAL TABLE temp.csv2 USING csv(
27
+ ** filename = "../http.log",
28
+ ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29
+ ** );
30
+ **
31
+ ** Instead of specifying a file, the text of the CSV can be loaded using
32
+ ** the data= parameter.
33
+ **
34
+ ** If the columns=N parameter is supplied, then the CSV file is assumed to have
35
+ ** N columns. If both the columns= and schema= parameters are omitted, then
36
+ ** the number and names of the columns is determined by the first line of
37
+ ** the CSV input.
38
+ **
39
+ ** Some extra debugging features (used for testing virtual tables) are available
40
+ ** if this module is compiled with -DSQLITE_TEST.
41
+ */
42
+ #include "sqlite3.h"
43
+ #include "sqlite3ext.h"
44
+ SQLITE_EXTENSION_INIT1
45
+ #include <string.h>
46
+ #include <stdlib.h>
47
+ #include <assert.h>
48
+ #include <stdarg.h>
49
+ #include <ctype.h>
50
+ #include <stdio.h>
51
+
52
+ #ifndef SQLITE_OMIT_VIRTUALTABLE
53
+
54
+ /*
55
+ ** A macro to hint to the compiler that a function should not be
56
+ ** inlined.
57
+ */
58
+ #if defined(__GNUC__)
59
+ # define CSV_NOINLINE __attribute__((noinline))
60
+ #elif defined(_MSC_VER) && _MSC_VER>=1310
61
+ # define CSV_NOINLINE __declspec(noinline)
62
+ #else
63
+ # define CSV_NOINLINE
64
+ #endif
65
+
66
+
67
+ /* Max size of the error message in a CsvReader */
68
+ #define CSV_MXERR 200
69
+
70
+ /* Size of the CsvReader input buffer */
71
+ #define CSV_INBUFSZ 1024
72
+
73
+ /* A context object used when read a CSV file. */
74
+ typedef struct CsvReader CsvReader;
75
+ struct CsvReader {
76
+ FILE *in; /* Read the CSV text from this input stream */
77
+ char *z; /* Accumulated text for a field */
78
+ int n; /* Number of bytes in z */
79
+ int nAlloc; /* Space allocated for z[] */
80
+ int nLine; /* Current line number */
81
+ int bNotFirst; /* True if prior text has been seen */
82
+ int cTerm; /* Character that terminated the most recent field */
83
+ unsigned char put; /* true if we just read \r and next char wasn't \n */
84
+ int put_value; /* if put = true, the value */
85
+ size_t iIn; /* Next unread character in the input buffer */
86
+ size_t nIn; /* Number of characters in the input buffer */
87
+ char *zIn; /* The input buffer */
88
+ char zErr[CSV_MXERR]; /* Error message */
89
+ };
90
+
91
+ /* Initialize a CsvReader object */
92
+ static void csv_reader_init(CsvReader *p){
93
+ p->in = 0;
94
+ p->z = 0;
95
+ p->n = 0;
96
+ p->nAlloc = 0;
97
+ p->nLine = 0;
98
+ p->bNotFirst = 0;
99
+ p->nIn = 0;
100
+ p->zIn = 0;
101
+ p->zErr[0] = 0;
102
+ }
103
+
104
+ /* Close and reset a CsvReader object */
105
+ static void csv_reader_reset(CsvReader *p){
106
+ if( p->in ){
107
+ fclose(p->in);
108
+ sqlite3_free(p->zIn);
109
+ }
110
+ sqlite3_free(p->z);
111
+ csv_reader_init(p);
112
+ }
113
+
114
+ /* Report an error on a CsvReader */
115
+ static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
116
+ va_list ap;
117
+ va_start(ap, zFormat);
118
+ sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
119
+ va_end(ap);
120
+ }
121
+
122
+ /* Open the file associated with a CsvReader
123
+ ** Return the number of errors.
124
+ */
125
+ static int csv_reader_open(
126
+ CsvReader *p, /* The reader to open */
127
+ const char *zFilename, /* Read from this filename */
128
+ const char *zData /* ... or use this data */
129
+ ){
130
+ if( zFilename ){
131
+ p->zIn = sqlite3_malloc( CSV_INBUFSZ );
132
+ if( p->zIn==0 ){
133
+ csv_errmsg(p, "out of memory");
134
+ return 1;
135
+ }
136
+ p->in = fopen(zFilename, "rb");
137
+ if( p->in==0 ){
138
+ sqlite3_free(p->zIn);
139
+ csv_reader_reset(p);
140
+ csv_errmsg(p, "cannot open '%s' for reading", zFilename);
141
+ return 1;
142
+ }
143
+ }else{
144
+ assert( p->in==0 );
145
+ p->zIn = (char*)zData;
146
+ p->nIn = strlen(zData);
147
+ }
148
+ return 0;
149
+ }
150
+
151
+ /* The input buffer has overflowed. Refill the input buffer, then
152
+ ** return the next character
153
+ */
154
+ static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
155
+ size_t got;
156
+
157
+ assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
158
+ assert( p->in!=0 ); /* Only called if reading froma file */
159
+
160
+ got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
161
+ if( got==0 ) return EOF;
162
+ p->nIn = got;
163
+ p->iIn = 1;
164
+ return p->zIn[0];
165
+ }
166
+
167
+ /* Return the next character of input. Return EOF at end of input. */
168
+ static int csv_getc(CsvReader *p){
169
+ if( p->put ) {
170
+ p->put = 0;
171
+ return p->put_value;
172
+ }
173
+ if( p->iIn >= p->nIn ){
174
+ if( p->in!=0 ) {
175
+ return csv_getc_refill(p);
176
+ }
177
+ return EOF;
178
+ }
179
+ return ((unsigned char*)p->zIn)[p->iIn++];
180
+ }
181
+
182
+ /* Increase the size of p->z and append character c to the end.
183
+ ** Return 0 on success and non-zero if there is an OOM error */
184
+ static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
185
+ char *zNew;
186
+ int nNew = p->nAlloc*2 + 100;
187
+ zNew = sqlite3_realloc64(p->z, nNew);
188
+ if( zNew ){
189
+ p->z = zNew;
190
+ p->nAlloc = nNew;
191
+ p->z[p->n++] = c;
192
+ return 0;
193
+ }else{
194
+ csv_errmsg(p, "out of memory");
195
+ return 1;
196
+ }
197
+ }
198
+
199
+ /* Append a single character to the CsvReader.z[] array.
200
+ ** Return 0 on success and non-zero if there is an OOM error */
201
+ static int csv_append(CsvReader *p, char c){
202
+ if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
203
+ p->z[p->n++] = c;
204
+ return 0;
205
+ }
206
+
207
+ /* Read a single field of CSV text. Compatible with rfc4180 and extended
208
+ ** with the option of having a separator other than ",".
209
+ **
210
+ ** + Input comes from p->in.
211
+ ** + Store results in p->z of length p->n. Space to hold p->z comes
212
+ ** from sqlite3_malloc64().
213
+ ** + Keep track of the line number in p->nLine.
214
+ ** + Store the character that terminates the field in p->cTerm. Store
215
+ ** EOF on end-of-file.
216
+ **
217
+ ** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have
218
+ ** been set to EOF.
219
+ */
220
+ static char *csv_read_one_field(CsvReader *p){
221
+ int c;
222
+ p->n = 0;
223
+ c = csv_getc(p);
224
+ if( c==EOF ){
225
+ p->cTerm = EOF;
226
+ return 0;
227
+ }
228
+
229
+ int notFirst = p->bNotFirst;
230
+ p->bNotFirst = 1;
231
+
232
+ if( c=='"' ){
233
+ int pc, ppc;
234
+ int startLine = p->nLine;
235
+ pc = ppc = 0;
236
+ while( 1 ){
237
+ c = csv_getc(p);
238
+ if( c<='"' || pc=='"' ){
239
+ if( c=='\n' ) p->nLine++;
240
+ else if( c == '\r') {
241
+ p->nLine++;
242
+ // swallow the following \n, if any
243
+ int next_c = csv_getc(p);
244
+ if(next_c != '\n') {
245
+ p->put = 1;
246
+ p->put_value = next_c;
247
+ }
248
+ } else if( c=='"' ){
249
+ if( pc=='"' ){
250
+ pc = 0;
251
+ continue;
252
+ }
253
+ }
254
+ if( (c==',' && pc=='"')
255
+ || (c=='\n' && pc=='"')
256
+ || (c=='\n' && pc=='\r' && ppc=='"')
257
+ || (c==EOF && pc=='"')
258
+ ){
259
+ do{ p->n--; }while( p->z[p->n]!='"' );
260
+ p->cTerm = (char)c;
261
+ break;
262
+ }
263
+ if( pc=='"' && c!='\r' ){
264
+ csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
265
+ break;
266
+ }
267
+ if( c==EOF ){
268
+ csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
269
+ startLine, '"');
270
+ p->cTerm = (char)c;
271
+ break;
272
+ }
273
+ }
274
+ if( csv_append(p, (char)c) ) return 0;
275
+ ppc = pc;
276
+ pc = c;
277
+ }
278
+ }else{
279
+ /* If this is the first field being parsed and it begins with the
280
+ ** UTF-8 BOM (0xEF BB BF) then skip the BOM */
281
+ if( (c&0xff)==0xef && notFirst==0 ){
282
+ csv_append(p, (char)c);
283
+ c = csv_getc(p);
284
+ if( (c&0xff)==0xbb ){
285
+ csv_append(p, (char)c);
286
+ c = csv_getc(p);
287
+ if( (c&0xff)==0xbf ){
288
+ // p->bNotFirst = 1;
289
+ p->n = 0;
290
+ return csv_read_one_field(p);
291
+ }
292
+ }
293
+ }
294
+ while( c>',' || (c!=EOF && c!=',' && c!='\n' && c!='\r') ){
295
+ if( csv_append(p, (char)c) ) return 0;
296
+ c = csv_getc(p);
297
+ }
298
+ if( c=='\n' || c == '\r' ){
299
+ p->nLine++;
300
+ if(c == '\r') { // swallow the next char, if it's \n
301
+ int next_c = csv_getc(p);
302
+ if(next_c != '\n') {
303
+ p->put = 1;
304
+ p->put_value = next_c;
305
+ }
306
+ }
307
+ }
308
+ p->cTerm = (char)c;
309
+ }
310
+ if( p->z ) p->z[p->n] = 0;
311
+
312
+ else if(!p->n) return ""; // needed to prevent a false EOF signal if the first data row's first cell is 0 length
313
+ // p->bNotFirst = 1;
314
+ return p->z;
315
+ }
316
+
317
+
318
+ /* Forward references to the various virtual table methods implemented
319
+ ** in this file. */
320
+ static int csvtabCreate(sqlite3*, void*, int, const char*const*,
321
+ sqlite3_vtab**,char**);
322
+ static int csvtabConnect(sqlite3*, void*, int, const char*const*,
323
+ sqlite3_vtab**,char**);
324
+ static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
325
+ static int csvtabDisconnect(sqlite3_vtab*);
326
+ static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
327
+ static int csvtabClose(sqlite3_vtab_cursor*);
328
+ static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
329
+ int argc, sqlite3_value **argv);
330
+ static int csvtabNext(sqlite3_vtab_cursor*);
331
+ static int csvtabEof(sqlite3_vtab_cursor*);
332
+ static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
333
+ static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
334
+
335
+ /* An instance of the CSV virtual table */
336
+ typedef struct CsvTable {
337
+ sqlite3_vtab base; /* Base class. Must be first */
338
+ char *zFilename; /* Name of the CSV file */
339
+ char *zData; /* Raw CSV data in lieu of zFilename */
340
+ long iStart; /* Offset to start of data in zFilename */
341
+ int nCol; /* Number of columns in the CSV file */
342
+ unsigned int tstFlags; /* Bit values used for testing */
343
+ } CsvTable;
344
+
345
+ /* Allowed values for tstFlags */
346
+ #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
347
+
348
+ /* A cursor for the CSV virtual table */
349
+ typedef struct CsvCursor {
350
+ sqlite3_vtab_cursor base; /* Base class. Must be first */
351
+ CsvReader rdr; /* The CsvReader object */
352
+ char **azVal; /* Value of the current row */
353
+ int *aLen; /* Length of each entry */
354
+ sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
355
+ } CsvCursor;
356
+
357
+ /* Transfer error message text from a reader into a CsvTable */
358
+ static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
359
+ sqlite3_free(pTab->base.zErrMsg);
360
+ pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
361
+ }
362
+
363
+ /*
364
+ ** This method is the destructor fo a CsvTable object.
365
+ */
366
+ static int csvtabDisconnect(sqlite3_vtab *pVtab){
367
+ CsvTable *p = (CsvTable*)pVtab;
368
+ sqlite3_free(p->zFilename);
369
+ sqlite3_free(p->zData);
370
+ sqlite3_free(p);
371
+ return SQLITE_OK;
372
+ }
373
+
374
+ /* Skip leading whitespace. Return a pointer to the first non-whitespace
375
+ ** character, or to the zero terminator if the string has only whitespace */
376
+ static const char *csv_skip_whitespace(const char *z){
377
+ while( isspace((unsigned char)z[0]) ) z++;
378
+ return z;
379
+ }
380
+
381
+ /* Remove trailing whitespace from the end of string z[] */
382
+ static void csv_trim_whitespace(char *z){
383
+ size_t n = strlen(z);
384
+ while( n>0 && isspace((unsigned char)z[n]) ) n--;
385
+ z[n] = 0;
386
+ }
387
+
388
+ /* Dequote the string */
389
+ static void csv_dequote(char *z){
390
+ int j;
391
+ char cQuote = z[0];
392
+ size_t i, n;
393
+
394
+ if( cQuote!='\'' && cQuote!='"' ) return;
395
+ n = strlen(z);
396
+ if( n<2 || z[n-1]!=z[0] ) return;
397
+ for(i=1, j=0; i<n-1; i++){
398
+ if( z[i]==cQuote && z[i+1]==cQuote ) i++;
399
+ z[j++] = z[i];
400
+ }
401
+ z[j] = 0;
402
+ }
403
+
404
+ /* Check to see if the string is of the form: "TAG = VALUE" with optional
405
+ ** whitespace before and around tokens. If it is, return a pointer to the
406
+ ** first character of VALUE. If it is not, return NULL.
407
+ */
408
+ static const char *csv_parameter(const char *zTag, int nTag, const char *z){
409
+ z = csv_skip_whitespace(z);
410
+ if( strncmp(zTag, z, nTag)!=0 ) return 0;
411
+ z = csv_skip_whitespace(z+nTag);
412
+ if( z[0]!='=' ) return 0;
413
+ return csv_skip_whitespace(z+1);
414
+ }
415
+
416
+ /* Decode a parameter that requires a dequoted string.
417
+ **
418
+ ** Return 1 if the parameter is seen, or 0 if not. 1 is returned
419
+ ** even if there is an error. If an error occurs, then an error message
420
+ ** is left in p->zErr. If there are no errors, p->zErr[0]==0.
421
+ */
422
+ static int csv_string_parameter(
423
+ CsvReader *p, /* Leave the error message here, if there is one */
424
+ const char *zParam, /* Parameter we are checking for */
425
+ const char *zArg, /* Raw text of the virtual table argment */
426
+ char **pzVal /* Write the dequoted string value here */
427
+ ){
428
+ const char *zValue;
429
+ zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
430
+ if( zValue==0 ) return 0;
431
+ p->zErr[0] = 0;
432
+ if( *pzVal ){
433
+ csv_errmsg(p, "more than one '%s' parameter", zParam);
434
+ return 1;
435
+ }
436
+ *pzVal = sqlite3_mprintf("%s", zValue);
437
+ if( *pzVal==0 ){
438
+ csv_errmsg(p, "out of memory");
439
+ return 1;
440
+ }
441
+ csv_trim_whitespace(*pzVal);
442
+ csv_dequote(*pzVal);
443
+ return 1;
444
+ }
445
+
446
+
447
+ /* Return 0 if the argument is false and 1 if it is true. Return -1 if
448
+ ** we cannot really tell.
449
+ */
450
+ static int csv_boolean(const char *z){
451
+ if( sqlite3_stricmp("yes",z)==0
452
+ || sqlite3_stricmp("on",z)==0
453
+ || sqlite3_stricmp("true",z)==0
454
+ || (z[0]=='1' && z[1]==0)
455
+ ){
456
+ return 1;
457
+ }
458
+ if( sqlite3_stricmp("no",z)==0
459
+ || sqlite3_stricmp("off",z)==0
460
+ || sqlite3_stricmp("false",z)==0
461
+ || (z[0]=='0' && z[1]==0)
462
+ ){
463
+ return 0;
464
+ }
465
+ return -1;
466
+ }
467
+
468
+ /* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
469
+ ** If it is, set *pValue to be the value of the boolean ("true" if there is
470
+ ** not "= BOOLEAN" component) and return non-zero. If the input string
471
+ ** does not begin with TAG, return zero.
472
+ */
473
+ static int csv_boolean_parameter(
474
+ const char *zTag, /* Tag we are looking for */
475
+ int nTag, /* Size of the tag in bytes */
476
+ const char *z, /* Input parameter */
477
+ int *pValue /* Write boolean value here */
478
+ ){
479
+ int b;
480
+ z = csv_skip_whitespace(z);
481
+ if( strncmp(zTag, z, nTag)!=0 ) return 0;
482
+ z = csv_skip_whitespace(z + nTag);
483
+ if( z[0]==0 ){
484
+ *pValue = 1;
485
+ return 1;
486
+ }
487
+ if( z[0]!='=' ) return 0;
488
+ z = csv_skip_whitespace(z+1);
489
+ b = csv_boolean(z);
490
+ if( b>=0 ){
491
+ *pValue = b;
492
+ return 1;
493
+ }
494
+ return 0;
495
+ }
496
+
497
+ /*
498
+ ** Parameters:
499
+ ** filename=FILENAME Name of file containing CSV content
500
+ ** data=TEXT Direct CSV content.
501
+ ** schema=SCHEMA Alternative CSV schema.
502
+ ** header=YES|NO First row of CSV defines the names of
503
+ ** columns if "yes". Default "no".
504
+ ** columns=N Assume the CSV file contains N columns.
505
+ ** max_columns=N Error out if we encounter more cols than this
506
+ **
507
+ ** Only available if compiled with SQLITE_TEST:
508
+ **
509
+ ** testflags=N Bitmask of test flags. Optional
510
+ **
511
+ ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
512
+ ** and so forth. If columns=N is omitted, then the file is opened and
513
+ ** the number of columns in the first row is counted to determine the
514
+ ** column count. If header=YES, then the first row is skipped.
515
+ */
516
+ static int csvtabConnect(
517
+ sqlite3 *db,
518
+ void *_pAux,
519
+ int argc, const char *const*argv,
520
+ sqlite3_vtab **ppVtab,
521
+ char **pzErr
522
+ ){
523
+ CsvTable *pNew = 0; /* The CsvTable object to construct */
524
+ int bHeader = -1; /* header= flags. -1 means not seen yet */
525
+ int rc = SQLITE_OK; /* Result code from this routine */
526
+ int i, j; /* Loop counters */
527
+ #ifdef SQLITE_TEST
528
+ int tstFlags = 0; /* Value for testflags=N parameter */
529
+ #endif
530
+ int max_columns = 2000; /* default max columns */
531
+ int b; /* Value of a boolean parameter */
532
+ int nCol = -99; /* Value of the columns= parameter */
533
+ CsvReader sRdr; /* A CSV file reader used to store an error
534
+ ** message and/or to count the number of columns */
535
+ static const char *azParam[] = {
536
+ "filename", "data", "schema",
537
+ };
538
+ char *azPValue[3]; /* Parameter values */
539
+ # define CSV_FILENAME (azPValue[0])
540
+ # define CSV_DATA (azPValue[1])
541
+ # define CSV_SCHEMA (azPValue[2])
542
+ (void)(_pAux);
543
+
544
+ assert( sizeof(azPValue)==sizeof(azParam) );
545
+ memset(&sRdr, 0, sizeof(sRdr));
546
+ memset(azPValue, 0, sizeof(azPValue));
547
+ for(i=3; i<argc; i++){
548
+ const char *z = argv[i];
549
+ const char *zValue;
550
+ for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
551
+ if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
552
+ }
553
+ if( j<sizeof(azParam)/sizeof(azParam[0]) ){
554
+ if( sRdr.zErr[0] ) goto csvtab_connect_error;
555
+ }else
556
+ if( csv_boolean_parameter("header",6,z,&b) ){
557
+ if( bHeader>=0 ){
558
+ csv_errmsg(&sRdr, "more than one 'header' parameter");
559
+ goto csvtab_connect_error;
560
+ }
561
+ bHeader = b;
562
+ }else
563
+ #ifdef SQLITE_TEST
564
+ if( (zValue = csv_parameter("testflags",9,z))!=0 ){
565
+ tstFlags = (unsigned int)atoi(zValue);
566
+ }else
567
+ #endif
568
+ if( (zValue = csv_parameter("max_columns",11,z))!=0 ){
569
+ max_columns = atoi(zValue);
570
+ if( max_columns<=0 || max_columns > 2000 ){
571
+ csv_errmsg(&sRdr, "max_columns= value must be > 0 and < 2000");
572
+ goto csvtab_connect_error;
573
+ }
574
+ }else
575
+ if( (zValue = csv_parameter("columns",7,z))!=0 ){
576
+ if( nCol>0 ){
577
+ csv_errmsg(&sRdr, "more than one 'columns' parameter");
578
+ goto csvtab_connect_error;
579
+ }
580
+ nCol = atoi(zValue);
581
+ if( nCol<=0 ){
582
+ csv_errmsg(&sRdr, "column= value must be positive");
583
+ goto csvtab_connect_error;
584
+ }
585
+ }else
586
+ {
587
+ csv_errmsg(&sRdr, "bad parameter: '%s'", z);
588
+ goto csvtab_connect_error;
589
+ }
590
+ }
591
+ if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
592
+ csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
593
+ goto csvtab_connect_error;
594
+ }
595
+
596
+ if( (nCol<=0 || bHeader==1)
597
+ && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
598
+ ){
599
+ csv_errmsg(&sRdr, "unable to open csv reader");
600
+ goto csvtab_connect_error;
601
+ }
602
+ pNew = sqlite3_malloc( sizeof(*pNew) );
603
+ *ppVtab = (sqlite3_vtab*)pNew;
604
+ if( pNew==0 ) goto csvtab_connect_oom;
605
+ memset(pNew, 0, sizeof(*pNew));
606
+ if( CSV_SCHEMA==0 ){
607
+ sqlite3_str *pStr = sqlite3_str_new(0);
608
+ char *zSep = "";
609
+ int iCol = 0;
610
+ sqlite3_str_appendf(pStr, "CREATE TABLE x(");
611
+ if( nCol<0 && bHeader<1 ){
612
+ nCol = 0;
613
+ do{
614
+ csv_read_one_field(&sRdr);
615
+ nCol++;
616
+ if(nCol > max_columns) {
617
+ csv_errmsg(&sRdr, "Column limit (%i) exceeded", max_columns);
618
+ goto csvtab_connect_error;
619
+ }
620
+ }while( sRdr.cTerm==',');
621
+ }
622
+ if( nCol>0 && bHeader<1 ){
623
+ for(iCol=0; iCol<nCol; iCol++){
624
+ sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
625
+ zSep = ",";
626
+ }
627
+ }else{
628
+ do{
629
+ char *z = csv_read_one_field(&sRdr);
630
+ if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
631
+ sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
632
+ zSep = ",";
633
+ iCol++;
634
+
635
+ if(iCol > max_columns) {
636
+ csv_errmsg(&sRdr, "Column limit (%i) exceeded", max_columns);
637
+ goto csvtab_connect_error;
638
+ }
639
+ }
640
+ }while( sRdr.cTerm==',' );
641
+ if( nCol<0 ){
642
+ nCol = iCol;
643
+ }else{
644
+ while( iCol<nCol ){
645
+ sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
646
+ zSep = ",";
647
+ }
648
+ }
649
+ }
650
+ pNew->nCol = nCol;
651
+ sqlite3_str_appendf(pStr, ")");
652
+ CSV_SCHEMA = sqlite3_str_finish(pStr);
653
+ if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
654
+ }else if( nCol<0 ){
655
+ do{
656
+ csv_read_one_field(&sRdr);
657
+ pNew->nCol++;
658
+
659
+ if(pNew->nCol > max_columns) {
660
+ csv_errmsg(&sRdr, "Column limit (%i) exceeded", max_columns);
661
+ goto csvtab_connect_error;
662
+ }
663
+ }while( sRdr.cTerm==',' );
664
+ }else{
665
+ pNew->nCol = nCol;
666
+ }
667
+ pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
668
+ pNew->zData = CSV_DATA; CSV_DATA = 0;
669
+ #ifdef SQLITE_TEST
670
+ pNew->tstFlags = tstFlags;
671
+ #endif
672
+ if( bHeader!=1 ){
673
+ pNew->iStart = 0;
674
+ }else if( pNew->zData ){
675
+ pNew->iStart = (int)sRdr.iIn - sRdr.put;
676
+ }else{
677
+ pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn - sRdr.put);
678
+ }
679
+ csv_reader_reset(&sRdr);
680
+ rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
681
+ if( rc ){
682
+ csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
683
+ goto csvtab_connect_error;
684
+ }
685
+ for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
686
+ sqlite3_free(azPValue[i]);
687
+ }
688
+ /* Rationale for DIRECTONLY:
689
+ ** An attacker who controls a database schema could use this vtab
690
+ ** to exfiltrate sensitive data from other files in the filesystem.
691
+ ** And, recommended practice is to put all CSV virtual tables in the
692
+ ** TEMP namespace, so they should still be usable from within TEMP
693
+ ** views, so there shouldn't be a serious loss of functionality by
694
+ ** prohibiting the use of this vtab from persistent triggers and views.
695
+ */
696
+ sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
697
+ return SQLITE_OK;
698
+
699
+ csvtab_connect_oom:
700
+ rc = SQLITE_NOMEM;
701
+ csv_errmsg(&sRdr, "out of memory");
702
+
703
+ csvtab_connect_error:
704
+ if( pNew ) csvtabDisconnect(&pNew->base);
705
+ for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
706
+ sqlite3_free(azPValue[i]);
707
+ }
708
+ if( sRdr.zErr[0] ){
709
+ sqlite3_free(*pzErr);
710
+ *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
711
+ }
712
+ csv_reader_reset(&sRdr);
713
+ if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
714
+ return rc;
715
+ }
716
+
717
+ /*
718
+ ** Reset the current row content held by a CsvCursor.
719
+ */
720
+ static void csvtabCursorRowReset(CsvCursor *pCur){
721
+ CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
722
+ int i;
723
+ for(i=0; i<pTab->nCol; i++){
724
+ sqlite3_free(pCur->azVal[i]);
725
+ pCur->azVal[i] = 0;
726
+ pCur->aLen[i] = 0;
727
+ }
728
+ }
729
+
730
+ /*
731
+ ** The xConnect and xCreate methods do the same thing, but they must be
732
+ ** different so that the virtual table is not an eponymous virtual table.
733
+ */
734
+ static int csvtabCreate(
735
+ sqlite3 *db,
736
+ void *pAux,
737
+ int argc, const char *const*argv,
738
+ sqlite3_vtab **ppVtab,
739
+ char **pzErr
740
+ ){
741
+ return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
742
+ }
743
+
744
+ /*
745
+ ** Destructor for a CsvCursor.
746
+ */
747
+ static int csvtabClose(sqlite3_vtab_cursor *cur){
748
+ CsvCursor *pCur = (CsvCursor*)cur;
749
+ csvtabCursorRowReset(pCur);
750
+ csv_reader_reset(&pCur->rdr);
751
+ sqlite3_free(cur);
752
+ return SQLITE_OK;
753
+ }
754
+
755
+ /*
756
+ ** Constructor for a new CsvTable cursor object.
757
+ */
758
+ static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
759
+ CsvTable *pTab = (CsvTable*)p;
760
+ CsvCursor *pCur;
761
+ size_t nByte;
762
+ nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
763
+ pCur = sqlite3_malloc64( nByte );
764
+ if( pCur==0 ) return SQLITE_NOMEM;
765
+ memset(pCur, 0, nByte);
766
+ pCur->azVal = (char**)&pCur[1];
767
+ pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
768
+ *ppCursor = &pCur->base;
769
+ if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
770
+ csv_xfer_error(pTab, &pCur->rdr);
771
+ return SQLITE_ERROR;
772
+ }
773
+ return SQLITE_OK;
774
+ }
775
+
776
+
777
+ /*
778
+ ** Advance a CsvCursor to its next row of input.
779
+ ** Set the EOF marker if we reach the end of input.
780
+ */
781
+ static int csvtabNext(sqlite3_vtab_cursor *cur){
782
+ CsvCursor *pCur = (CsvCursor*)cur;
783
+ CsvTable *pTab = (CsvTable*)cur->pVtab;
784
+ int i = 0;
785
+ char *z;
786
+ do{
787
+ z = csv_read_one_field(&pCur->rdr);
788
+ if( z==0 ){
789
+ break;
790
+ }
791
+ if( i<pTab->nCol ){
792
+ if( pCur->aLen[i] < pCur->rdr.n+1 ){
793
+ char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
794
+ if( zNew==0 ){
795
+ csv_errmsg(&pCur->rdr, "out of memory");
796
+ csv_xfer_error(pTab, &pCur->rdr);
797
+ break;
798
+ }
799
+ pCur->azVal[i] = zNew;
800
+ pCur->aLen[i] = pCur->rdr.n+1;
801
+ }
802
+ memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
803
+ i++;
804
+ }
805
+ }while( pCur->rdr.cTerm==',' );
806
+ if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){
807
+ pCur->iRowid = -1;
808
+ }else{
809
+ pCur->iRowid++;
810
+ while( i<pTab->nCol ){
811
+ sqlite3_free(pCur->azVal[i]);
812
+ pCur->azVal[i] = 0;
813
+ pCur->aLen[i] = 0;
814
+ i++;
815
+ }
816
+ }
817
+ return SQLITE_OK;
818
+ }
819
+
820
+ /*
821
+ ** Return values of columns for the row at which the CsvCursor
822
+ ** is currently pointing.
823
+ */
824
+ static int csvtabColumn(
825
+ sqlite3_vtab_cursor *cur, /* The cursor */
826
+ sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
827
+ int i /* Which column to return */
828
+ ){
829
+ CsvCursor *pCur = (CsvCursor*)cur;
830
+ CsvTable *pTab = (CsvTable*)cur->pVtab;
831
+ if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
832
+ sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
833
+ }
834
+ return SQLITE_OK;
835
+ }
836
+
837
+ /*
838
+ ** Return the rowid for the current row.
839
+ */
840
+ static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
841
+ CsvCursor *pCur = (CsvCursor*)cur;
842
+ *pRowid = pCur->iRowid;
843
+ return SQLITE_OK;
844
+ }
845
+
846
+ /*
847
+ ** Return TRUE if the cursor has been moved off of the last
848
+ ** row of output.
849
+ */
850
+ static int csvtabEof(sqlite3_vtab_cursor *cur){
851
+ CsvCursor *pCur = (CsvCursor*)cur;
852
+ return pCur->iRowid<0;
853
+ }
854
+
855
+ /*
856
+ ** Only a full table scan is supported. So xFilter simply rewinds to
857
+ ** the beginning.
858
+ */
859
+ static int csvtabFilter(
860
+ sqlite3_vtab_cursor *pVtabCursor,
861
+ int idxNum, const char *idxStr,
862
+ int argc, sqlite3_value **argv
863
+ ){
864
+ CsvCursor *pCur = (CsvCursor*)pVtabCursor;
865
+ CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
866
+ pCur->iRowid = 0;
867
+ if( pCur->rdr.in==0 ){
868
+ assert( pCur->rdr.zIn==pTab->zData );
869
+ assert( pTab->iStart>=0 );
870
+ assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
871
+ pCur->rdr.iIn = pTab->iStart;
872
+ }else{
873
+ fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
874
+ pCur->rdr.iIn = 0;
875
+ pCur->rdr.nIn = 0;
876
+ }
877
+ return csvtabNext(pVtabCursor);
878
+ }
879
+
880
+ /*
881
+ ** Only a forward full table scan is supported. xBestIndex is mostly
882
+ ** a no-op. If CSVTEST_FIDX is set, then the presence of equality
883
+ ** constraints lowers the estimated cost, which is fiction, but is useful
884
+ ** for testing certain kinds of virtual table behavior.
885
+ */
886
+ static int csvtabBestIndex(
887
+ sqlite3_vtab *tab,
888
+ sqlite3_index_info *pIdxInfo
889
+ ){
890
+ pIdxInfo->estimatedCost = 1000000;
891
+ #ifdef SQLITE_TEST
892
+ if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
893
+ /* The usual (and sensible) case is to always do a full table scan.
894
+ ** The code in this branch only runs when testflags=1. This code
895
+ ** generates an artifical and unrealistic plan which is useful
896
+ ** for testing virtual table logic but is not helpful to real applications.
897
+ **
898
+ ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
899
+ ** table (even though it is not) and the cost of running the virtual table
900
+ ** is reduced from 1 million to just 10. The constraints are *not* marked
901
+ ** as omittable, however, so the query planner should still generate a
902
+ ** plan that gives a correct answer, even if they plan is not optimal.
903
+ */
904
+ int i;
905
+ int nConst = 0;
906
+ for(i=0; i<pIdxInfo->nConstraint; i++){
907
+ unsigned char op;
908
+ if( pIdxInfo->aConstraint[i].usable==0 ) continue;
909
+ op = pIdxInfo->aConstraint[i].op;
910
+ if( op==SQLITE_INDEX_CONSTRAINT_EQ
911
+ || op==SQLITE_INDEX_CONSTRAINT_LIKE
912
+ || op==SQLITE_INDEX_CONSTRAINT_GLOB
913
+ ){
914
+ pIdxInfo->estimatedCost = 10;
915
+ pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
916
+ nConst++;
917
+ }
918
+ }
919
+ }
920
+ #endif
921
+ return SQLITE_OK;
922
+ }
923
+
924
+ sqlite3_module CsvModule = {
925
+ 0, /* iVersion */
926
+ csvtabCreate, /* xCreate */
927
+ csvtabConnect, /* xConnect */
928
+ csvtabBestIndex, /* xBestIndex */
929
+ csvtabDisconnect, /* xDisconnect */
930
+ csvtabDisconnect, /* xDestroy */
931
+ csvtabOpen, /* xOpen - open a cursor */
932
+ csvtabClose, /* xClose - close a cursor */
933
+ csvtabFilter, /* xFilter - configure scan constraints */
934
+ csvtabNext, /* xNext - advance a cursor */
935
+ csvtabEof, /* xEof - check for end of scan */
936
+ csvtabColumn, /* xColumn - read data */
937
+ csvtabRowid, /* xRowid - read data */
938
+ 0, /* xUpdate */
939
+ 0, /* xBegin */
940
+ 0, /* xSync */
941
+ 0, /* xCommit */
942
+ 0, /* xRollback */
943
+ 0, /* xFindMethod */
944
+ 0, /* xRename */
945
+ };
946
+
947
+ sqlite3_module *get_csv_module() {
948
+ sqlite3_module *m = calloc(1, sizeof(*m));
949
+ *m = CsvModule;
950
+ return m;
951
+ }
952
+
953
+ #ifdef SQLITE_TEST
954
+ /*
955
+ ** For virtual table testing, make a version of the CSV virtual table
956
+ ** available that has an xUpdate function. But the xUpdate always returns
957
+ ** SQLITE_READONLY since the CSV file is not really writable.
958
+ */
959
+ static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
960
+ return SQLITE_READONLY;
961
+ }
962
+ static sqlite3_module CsvModuleFauxWrite = {
963
+ 0, /* iVersion */
964
+ csvtabCreate, /* xCreate */
965
+ csvtabConnect, /* xConnect */
966
+ csvtabBestIndex, /* xBestIndex */
967
+ csvtabDisconnect, /* xDisconnect */
968
+ csvtabDisconnect, /* xDestroy */
969
+ csvtabOpen, /* xOpen - open a cursor */
970
+ csvtabClose, /* xClose - close a cursor */
971
+ csvtabFilter, /* xFilter - configure scan constraints */
972
+ csvtabNext, /* xNext - advance a cursor */
973
+ csvtabEof, /* xEof - check for end of scan */
974
+ csvtabColumn, /* xColumn - read data */
975
+ csvtabRowid, /* xRowid - read data */
976
+ csvtabUpdate, /* xUpdate */
977
+ 0, /* xBegin */
978
+ 0, /* xSync */
979
+ 0, /* xCommit */
980
+ 0, /* xRollback */
981
+ 0, /* xFindMethod */
982
+ 0, /* xRename */
983
+ };
984
+ #endif /* SQLITE_TEST */
985
+
986
+ #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
987
+
988
+
989
+ #ifdef _WIN32
990
+ __declspec(dllexport)
991
+ #endif
992
+ /*
993
+ ** This routine is called when the extension is loaded. The new
994
+ ** CSV virtual table module is registered with the calling database
995
+ ** connection.
996
+ */
997
+ int sqlite3_csv_init(
998
+ sqlite3 *db,
999
+ char **pzErrMsg,
1000
+ const sqlite3_api_routines *pApi
1001
+ ){
1002
+ #ifndef SQLITE_OMIT_VIRTUALTABLE
1003
+ int rc;
1004
+ SQLITE_EXTENSION_INIT2(pApi);
1005
+ rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
1006
+ #ifdef SQLITE_TEST
1007
+ if( rc==SQLITE_OK ){
1008
+ rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
1009
+ }
1010
+ #endif
1011
+ return rc;
1012
+ #else
1013
+ return SQLITE_OK;
1014
+ #endif
1015
+ }