duckdb-csv 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: fe118ef9fca3465343ead38eb1a09c7909ee2faede34067e183831289553c047
4
+ data.tar.gz: 50c877080de99c76ccdc749bb8517368ad194f73fdeb99fb0c856ea2791c3824
5
+ SHA512:
6
+ metadata.gz: 3bdaa5ba0ca98fd0c08255de81ca3daf48267f5d5c3c16ee61b2c6ce89a93e869a95e92c2e80a89d72de91d663724aba9eeb3355a3006bd62cc95085481fbb83
7
+ data.tar.gz: 25ed49181fa323f786c6290113d3040900fe833773d1fa1634a11cf6568c9b2ce5efbfa7a0a8c358e8500165b6ead1d79a34468d552261e5608fc33121fd1011
@@ -0,0 +1,33 @@
1
+ # Copilot Instructions
2
+
3
+ ## Commands
4
+
5
+ ```sh
6
+ bundle exec rake test # run all tests
7
+ bundle exec ruby -Ilib -Itest test/duckdb/csv/table_adapter_test.rb # run single test file
8
+ bundle exec ruby -Ilib -Itest test/duckdb/csv/table_adapter_test.rb -n test_s_register! # run single test by name
9
+ bundle exec rubocop # lint
10
+ ```
11
+
12
+ ## Architecture
13
+
14
+ This gem (`duckdb-csv`) implements a single-class CSV table adapter for [ruby-duckdb](https://github.com/suketa/ruby-duckdb). The sole public API is `DuckDB::CSV::TableAdapter`.
15
+
16
+ **Flow:**
17
+ 1. `TableAdapter.register!` — registers the adapter once globally via `DuckDB::TableFunction.add_table_adapter(::CSV, instance)`
18
+ 2. `con.expose_as_table(csv, 'name', columns: {...})` — exposes a `CSV` object as a named DuckDB table function
19
+ 3. DuckDB calls the adapter's `call` method to obtain a `DuckDB::TableFunction`, passing a block that yields one row at a time via `write_row`
20
+ 4. On exhaustion, `csv.rewind` is called so the table can be re-queried
21
+
22
+ **Column inference:**
23
+ - With headers → columns named after CSV headers, all typed `:varchar`
24
+ - Without headers → columns named `col1`, `col2`, … all typed `:varchar`
25
+ - Explicit `columns:` hash overrides inference with typed columns (`:integer`, `:float`, `:date`, `:timestamp`, `:varchar`)
26
+
27
+ ## Key Conventions
28
+
29
+ - **Thread safety**: DuckDB's `TableFunction` is not thread-safe; tests and README examples always call `con.execute('SET threads=1')` before querying a CSV table.
30
+ - **Type casting**: Field values are cast via `DuckDB.cast(field, logical_type)` from the output vector's logical type — no manual type conversion in adapter code.
31
+ - **`frozen_string_literal: true`** is set in every file.
32
+ - Tests use Minitest and live under `test/duckdb/csv/`. Test class names match file names (`TableAdapterTest` in `table_adapter_test.rb`).
33
+ - RuboCop is configured with `rubocop-minitest` and `rubocop-rake` plugins; line length max is 120.
@@ -0,0 +1,40 @@
1
+ name: Linter
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types:
9
+ - opened
10
+ - synchronize
11
+ - reopened
12
+
13
+ concurrency:
14
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
15
+ cancel-in-progress: true
16
+
17
+ jobs:
18
+ rubocop:
19
+ runs-on: ubuntu-latest
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - name: Download duckdb
24
+ run: |
25
+ curl -sLO https://github.com/duckdb/duckdb/releases/download/v1.4.4/libduckdb-linux-amd64.zip
26
+ mkdir -p duckdb-v1.4.4
27
+ unzip libduckdb-linux-amd64.zip -d duckdb-v1.4.4
28
+
29
+ - name: Set up Ruby
30
+ uses: ruby/setup-ruby@v1
31
+ with:
32
+ ruby-version: '3.3'
33
+
34
+ - name: bundle install
35
+ run: |
36
+ bundle config build.duckdb "--with-duckdb-include=${GITHUB_WORKSPACE}/duckdb-v1.4.4 --with-duckdb-lib=${GITHUB_WORKSPACE}/duckdb-v1.4.4"
37
+ bundle install --jobs 4 --retry 3
38
+
39
+ - name: Run RuboCop
40
+ run: bundle exec rubocop
@@ -0,0 +1,76 @@
1
+ name: Test
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types:
9
+ - opened
10
+ - synchronize
11
+ - reopened
12
+
13
+ concurrency:
14
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
15
+ cancel-in-progress: true
16
+
17
+ jobs:
18
+ test:
19
+ runs-on: ${{ matrix.os }}
20
+ timeout-minutes: 30
21
+ strategy:
22
+ matrix:
23
+ os: [ubuntu-latest, macos-latest]
24
+ ruby: ['3.2', '3.3', '3.4']
25
+ duckdb: ['1.4.4']
26
+ include:
27
+ - os: ubuntu-latest
28
+ duckdb_asset: libduckdb-linux-amd64.zip
29
+ - os: macos-latest
30
+ duckdb_asset: libduckdb-osx-universal.zip
31
+
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+
35
+ - name: duckdb cache
36
+ id: duckdb-cache
37
+ uses: actions/cache@v4
38
+ with:
39
+ path: duckdb-v${{ matrix.duckdb }}
40
+ key: ${{ runner.os }}-duckdb-v${{ matrix.duckdb }}
41
+
42
+ - name: Download duckdb ${{ matrix.duckdb }}
43
+ if: steps.duckdb-cache.outputs.cache-hit != 'true'
44
+ run: |
45
+ curl -sLO https://github.com/duckdb/duckdb/releases/download/v${{ matrix.duckdb }}/${{ matrix.duckdb_asset }}
46
+ mkdir -p duckdb-v${{ matrix.duckdb }}
47
+ unzip ${{ matrix.duckdb_asset }} -d duckdb-v${{ matrix.duckdb }}
48
+
49
+ - name: Set up Ruby ${{ matrix.ruby }}
50
+ uses: ruby/setup-ruby@v1
51
+ with:
52
+ ruby-version: ${{ matrix.ruby }}
53
+
54
+ - name: bundle install
55
+ run: |
56
+ bundle config build.duckdb "--with-duckdb-include=${GITHUB_WORKSPACE}/duckdb-v${{ matrix.duckdb }} --with-duckdb-lib=${GITHUB_WORKSPACE}/duckdb-v${{ matrix.duckdb }}"
57
+ bundle install --jobs 4 --retry 3
58
+
59
+ - name: Run tests (Linux)
60
+ if: runner.os == 'Linux'
61
+ run: |
62
+ export LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/duckdb-v${{ matrix.duckdb }}:${LD_LIBRARY_PATH}
63
+ bundle exec rake test
64
+
65
+ - name: Run tests (macOS)
66
+ if: runner.os == 'macOS'
67
+ run: |
68
+ export DYLD_LIBRARY_PATH=${GITHUB_WORKSPACE}/duckdb-v${{ matrix.duckdb }}:${DYLD_LIBRARY_PATH}
69
+ bundle exec rake test
70
+
71
+ post-test:
72
+ name: All tests passed
73
+ runs-on: ubuntu-latest
74
+ needs: test
75
+ steps:
76
+ - run: echo ok
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ *.a
10
+ *.bak
11
+ *.bundle
12
+ *.dll
13
+ *.log
14
+ *.o
15
+ *.obj
16
+ *.so
17
+ Makefile
18
+ ng_test
19
+ /docs/
20
+
data/.rubocop.yml ADDED
@@ -0,0 +1,32 @@
1
+ plugins:
2
+ - rubocop-minitest
3
+ - rubocop-rake
4
+
5
+ AllCops:
6
+ NewCops: enable
7
+ TargetRubyVersion: 3.2
8
+ Exclude:
9
+ - 'vendor/**/*'
10
+ - 'tmp/**/*'
11
+ - 'pkg/**/*'
12
+
13
+ Metrics/BlockLength:
14
+ Exclude:
15
+ - 'test/**/*'
16
+ - 'duckdb.gemspec'
17
+
18
+ Metrics/ClassLength:
19
+ Exclude:
20
+ - 'test/**/*_test.rb'
21
+ - 'lib/duckdb/prepared_statement.rb'
22
+ - 'lib/duckdb/appender.rb'
23
+
24
+ Metrics/ModuleLength:
25
+ Exclude:
26
+ - 'lib/duckdb/converter.rb'
27
+
28
+ Style/Documentation:
29
+ Enabled: false
30
+
31
+ Layout/LineLength:
32
+ Max: 120
data/CHANGELOG.md ADDED
@@ -0,0 +1,7 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ # Unreleased
6
+ - add DuckDB::CSV::TableAdapter
7
+
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
6
+
7
+ gem 'minitest', '~> 6.0'
8
+ gem 'rake', '~> 13.0'
9
+ gem 'rubocop', require: false
10
+ gem 'rubocop-minitest', require: false
11
+ gem 'rubocop-rake', require: false
data/Gemfile.lock ADDED
@@ -0,0 +1,110 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ duckdb-csv (0.0.1)
5
+ csv (>= 3.2.0)
6
+ duckdb (>= 1.4.4.0)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ addressable (2.8.9)
12
+ public_suffix (>= 2.0.2, < 8.0)
13
+ ast (2.4.3)
14
+ bigdecimal (4.0.1)
15
+ csv (3.3.5)
16
+ drb (2.2.3)
17
+ duckdb (1.4.4.0)
18
+ bigdecimal (>= 3.1.4)
19
+ json (2.19.0)
20
+ json-schema (6.2.0)
21
+ addressable (~> 2.8)
22
+ bigdecimal (>= 3.1, < 5)
23
+ language_server-protocol (3.17.0.5)
24
+ lint_roller (1.1.0)
25
+ mcp (0.8.0)
26
+ json-schema (>= 4.1)
27
+ minitest (6.0.2)
28
+ drb (~> 2.0)
29
+ prism (~> 1.5)
30
+ parallel (1.27.0)
31
+ parser (3.3.10.2)
32
+ ast (~> 2.4.1)
33
+ racc
34
+ prism (1.9.0)
35
+ public_suffix (7.0.5)
36
+ racc (1.8.1)
37
+ rainbow (3.1.1)
38
+ rake (13.3.1)
39
+ regexp_parser (2.11.3)
40
+ rubocop (1.85.1)
41
+ json (~> 2.3)
42
+ language_server-protocol (~> 3.17.0.2)
43
+ lint_roller (~> 1.1.0)
44
+ mcp (~> 0.6)
45
+ parallel (~> 1.10)
46
+ parser (>= 3.3.0.2)
47
+ rainbow (>= 2.2.2, < 4.0)
48
+ regexp_parser (>= 2.9.3, < 3.0)
49
+ rubocop-ast (>= 1.49.0, < 2.0)
50
+ ruby-progressbar (~> 1.7)
51
+ unicode-display_width (>= 2.4.0, < 4.0)
52
+ rubocop-ast (1.49.0)
53
+ parser (>= 3.3.7.2)
54
+ prism (~> 1.7)
55
+ rubocop-minitest (0.39.1)
56
+ lint_roller (~> 1.1)
57
+ rubocop (>= 1.75.0, < 2.0)
58
+ rubocop-ast (>= 1.38.0, < 2.0)
59
+ rubocop-rake (0.7.1)
60
+ lint_roller (~> 1.1)
61
+ rubocop (>= 1.72.1)
62
+ ruby-progressbar (1.13.0)
63
+ unicode-display_width (3.2.0)
64
+ unicode-emoji (~> 4.1)
65
+ unicode-emoji (4.2.0)
66
+
67
+ PLATFORMS
68
+ ruby
69
+ x86_64-linux
70
+
71
+ DEPENDENCIES
72
+ duckdb-csv!
73
+ minitest (~> 6.0)
74
+ rake (~> 13.0)
75
+ rubocop
76
+ rubocop-minitest
77
+ rubocop-rake
78
+
79
+ CHECKSUMS
80
+ addressable (2.8.9) sha256=cc154fcbe689711808a43601dee7b980238ce54368d23e127421753e46895485
81
+ ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
82
+ bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
83
+ csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
84
+ drb (2.2.3) sha256=0b00d6fdb50995fe4a45dea13663493c841112e4068656854646f418fda13373
85
+ duckdb (1.4.4.0) sha256=11ef69d0987eb237bfa2ab295301c437a55ff8d014d92abb510e757c9193b51c
86
+ duckdb-csv (0.0.1)
87
+ json (2.19.0) sha256=bc5202f083618b3af7aba3184146ec9d820f8f6de261838b577173475e499d9a
88
+ json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
89
+ language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
90
+ lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
91
+ mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
92
+ minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
93
+ parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
94
+ parser (3.3.10.2) sha256=6f60c84aa4bdcedb6d1a2434b738fe8a8136807b6adc8f7f53b97da9bc4e9357
95
+ prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
96
+ public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623
97
+ racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
98
+ rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
99
+ rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
100
+ regexp_parser (2.11.3) sha256=ca13f381a173b7a93450e53459075c9b76a10433caadcb2f1180f2c741fc55a4
101
+ rubocop (1.85.1) sha256=3dbcf9e961baa4c376eeeb2a03913dca5e3987033b04d38fa538aa1e7406cc77
102
+ rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
103
+ rubocop-minitest (0.39.1) sha256=998398d6da4026d297f0f9bf709a1eac5f2b6947c24431f94af08138510cf7ed
104
+ rubocop-rake (0.7.1) sha256=3797f2b6810c3e9df7376c26d5f44f3475eda59eb1adc38e6f62ecf027cbae4d
105
+ ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
106
+ unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
107
+ unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f
108
+
109
+ BUNDLED WITH
110
+ 4.0.4
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 suketa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,120 @@
1
+ # ruby-duckdb-csv
2
+
3
+ ## Description
4
+
5
+ This gem `duckdb-csv` provides a CSV table adapter for [ruby-duckdb](https://github.com/suketa/ruby-duckdb).
6
+ You can query Ruby's `CSV` objects using SQL through DuckDB by using this gem.
7
+ This gem is a sample implementation of a `DuckDB::TableFunction`.
8
+
9
+ ## Requirement
10
+
11
+ - Ruby
12
+ - [duckdb](https://github.com/suketa/ruby-duckdb)
13
+ - [csv](https://github.com/ruby/csv)
14
+
15
+ ## How to install
16
+
17
+ ```sh
18
+ gem install duckdb-csv
19
+ ```
20
+
21
+ Or add the following line to your `Gemfile`:
22
+
23
+ ```ruby
24
+ gem 'duckdb-csv'
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ### Basic usage
30
+
31
+ ```ruby
32
+ require 'duckdb'
33
+ require 'duckdb/csv/table_adapter'
34
+
35
+ db = DuckDB::Database.open
36
+ con = db.connect
37
+
38
+ csv_io = StringIO.new("id,name,age\n1,Alice,30\n2,Bob,25\n3,Charlie,35")
39
+ csv = CSV.new(csv_io, headers: true)
40
+
41
+ DuckDB::CSV::TableAdapter.register!
42
+
43
+ con.execute('SET threads=1') # currently the CSV table adapter is not thread-safe, so we set threads to 1
44
+ con.expose_as_table(csv, 'csv_table')
45
+ result = con.query('SELECT * FROM csv_table()')
46
+ result.each do |row|
47
+ puts row.inspect
48
+ end
49
+ # => ["1", "Alice", "30"]
50
+ # => ["2", "Bob", "25"]
51
+ # => ["3", "Charlie", "35"]
52
+ ```
53
+
54
+ ### Specifying column types
55
+
56
+ By default, all columns are treated as `VARCHAR`. You can specify column types using the `columns` option.
57
+
58
+ ```ruby
59
+ require 'duckdb'
60
+ require 'duckdb/csv/table_adapter'
61
+
62
+ db = DuckDB::Database.open
63
+ con = db.connect
64
+
65
+ csv_io = StringIO.new(<<~CSV.strip)
66
+ id,name,age,height,birthday,created_at
67
+ 1,Alice,30,1.65,1990-01-02,2023-01-01T10:11:12
68
+ 2,Bob,25,1.80,1995-05-15,2024-02-03T11:12:13
69
+ 3,Charlie,35,1.75,1985-10-30,2025-04-05T12:13:14
70
+ CSV
71
+ csv = CSV.new(csv_io, headers: true)
72
+
73
+ DuckDB::CSV::TableAdapter.register!
74
+
75
+ con.execute('SET threads=1') # currently the CSV table adapter is not thread-safe, so we set threads to 1
76
+ con.expose_as_table(
77
+ csv, 'csv_table',
78
+ columns: {
79
+ 'id' => :integer,
80
+ 'name' => :varchar,
81
+ 'age' => :integer,
82
+ 'height' => :float,
83
+ 'birthday' => :date,
84
+ 'created_at' => :timestamp
85
+ }
86
+ )
87
+
88
+ result = con.query('SELECT * FROM csv_table()')
89
+ result.each do |row|
90
+ puts row.inspect
91
+ end
92
+ # => [1, "Alice", 30, 1.65, #<Date: 1990-01-02>, 2023-01-01 10:11:12 +0900]
93
+ # => [2, "Bob", 25, 1.80, #<Date: 1995-05-15>, 2024-02-03 11:12:13 +0900]
94
+ # => [3, "Charlie", 35, 1.75, #<Date: 1985-10-30>, 2025-04-05 12:13:14 +0900]
95
+ ```
96
+
97
+ ### Headerless CSV
98
+
99
+ When CSV has no headers, columns are automatically named `col1`, `col2`, `col3`, etc.
100
+
101
+ ```ruby
102
+ csv_io = StringIO.new("1,Alice,30\n2,Bob,25\n3,Charlie,35")
103
+ csv = CSV.new(csv_io, headers: false)
104
+
105
+ DuckDB::CSV::TableAdapter.register!
106
+
107
+ con.execute('SET threads=1') # currently the CSV table adapter is not thread-safe, so we set threads to 1
108
+ con.expose_as_table(csv, 'csv_table')
109
+ result = con.query('SELECT col1, col2, col3 FROM csv_table()')
110
+ result.each do |row|
111
+ puts row.inspect
112
+ end
113
+ # => ["1", "Alice", "30"]
114
+ # => ["2", "Bob", "25"]
115
+ # => ["3", "Charlie", "35"]
116
+ ```
117
+
118
+ ## License
119
+
120
+ [MIT License](LICENSE).
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
+
6
+ test_config = lambda do |t|
7
+ t.libs << 'test'
8
+ t.libs << 'lib'
9
+ t.test_files = FileList['test/**/*_test.rb']
10
+ end
11
+
12
+ Rake::TestTask.new(:test, &test_config)
13
+ task default: %i[clobber test]
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'duckdb/csv/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'duckdb-csv'
9
+ spec.version = DuckDB::CSV::VERSION
10
+ spec.authors = ['Masaki Suketa']
11
+ spec.email = ['masaki.suketa@nifty.ne.jp']
12
+
13
+ spec.summary = 'This module provides CSV table adapter for duckdb.'
14
+ spec.description = 'This module provides CSV table adapter for duckdb. ' \
15
+ 'You can access CSV like as duckdb table by using this module.'
16
+ spec.homepage = 'https://github.com/suketa/ruby-duckdb-csv'
17
+ spec.license = 'MIT'
18
+
19
+ spec.metadata['rubygems_mfa_required'] = 'true'
20
+ spec.metadata['homepage_uri'] = spec.homepage
21
+ spec.metadata['source_code_uri'] = 'https://github.com/suketa/ruby-duckdb-csv'
22
+ spec.metadata['changelog_uri'] = 'https://github.com/suketa/ruby-duckdb-csv/blob/master/CHANGELOG.md'
23
+
24
+ # Specify which files should be added to the gem when it is released.
25
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
26
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
27
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
28
+ end
29
+ spec.require_paths = ['lib']
30
+ spec.required_ruby_version = '>= 3.2.0'
31
+ spec.add_dependency 'csv', '>= 3.2.0'
32
+ spec.add_dependency 'duckdb', '>= 1.4.4.0'
33
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'duckdb'
4
+ require 'csv'
5
+ require_relative 'version'
6
+
7
+ module DuckDB
8
+ module CSV
9
+ class TableAdapter
10
+ class << self
11
+ def register!
12
+ DuckDB::TableFunction.add_table_adapter(::CSV, new)
13
+ end
14
+ end
15
+
16
+ def call(csv, name, columns: nil)
17
+ columns ||= infer_columns(csv)
18
+ DuckDB::TableFunction.create(name:, columns:) do |_func_info, output|
19
+ write_row(csv, output)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def write_row(csv, output)
26
+ row = csv.readline
27
+
28
+ if row.nil?
29
+ csv.rewind
30
+ return 0
31
+ end
32
+
33
+ write_fields(extract_fields(row), output)
34
+ 1
35
+ end
36
+
37
+ def extract_fields(row)
38
+ row.is_a?(::CSV::Row) ? row.fields : row
39
+ end
40
+
41
+ def write_fields(fields, output)
42
+ fields.each_with_index do |field, index|
43
+ type = output.get_vector(index).logical_type
44
+ field = DuckDB.cast(field, type)
45
+ output.set_value(index, 0, field)
46
+ end
47
+ end
48
+
49
+ def infer_columns(csv)
50
+ columns = csv.headers ? headers_to_columns(csv) : create_columns_from_first_row(csv)
51
+ csv.rewind
52
+ columns
53
+ end
54
+
55
+ def headers_to_columns(csv)
56
+ csv.first.headers.to_h { |header| [header, :varchar] }
57
+ end
58
+
59
+ def create_columns_from_first_row(csv)
60
+ first_row = csv.first
61
+ first_row.size.times.with_object({}) do |i, columns|
62
+ columns["col#{i + 1}"] = :varchar
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DuckDB
4
+ module CSV
5
+ # The version string of ruby-duckdb-csv.
6
+ VERSION = '0.0.1'
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: duckdb-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Masaki Suketa
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: csv
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 3.2.0
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: 3.2.0
26
+ - !ruby/object:Gem::Dependency
27
+ name: duckdb
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.4.4.0
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 1.4.4.0
40
+ description: This module provides CSV table adapter for duckdb. You can access CSV
41
+ like as duckdb table by using this module.
42
+ email:
43
+ - masaki.suketa@nifty.ne.jp
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".github/copilot-instructions.md"
49
+ - ".github/workflows/linter.yml"
50
+ - ".github/workflows/test.yml"
51
+ - ".gitignore"
52
+ - ".rubocop.yml"
53
+ - CHANGELOG.md
54
+ - Gemfile
55
+ - Gemfile.lock
56
+ - LICENSE
57
+ - README.md
58
+ - Rakefile
59
+ - duckdb-csv.gemspec
60
+ - lib/duckdb/csv/table_adapter.rb
61
+ - lib/duckdb/csv/version.rb
62
+ homepage: https://github.com/suketa/ruby-duckdb-csv
63
+ licenses:
64
+ - MIT
65
+ metadata:
66
+ rubygems_mfa_required: 'true'
67
+ homepage_uri: https://github.com/suketa/ruby-duckdb-csv
68
+ source_code_uri: https://github.com/suketa/ruby-duckdb-csv
69
+ changelog_uri: https://github.com/suketa/ruby-duckdb-csv/blob/master/CHANGELOG.md
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: 3.2.0
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubygems_version: 4.0.3
85
+ specification_version: 4
86
+ summary: This module provides CSV table adapter for duckdb.
87
+ test_files: []