smarter_csv 1.14.4 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rvmrc +1 -1
- data/CHANGELOG.md +113 -0
- data/CONTRIBUTORS.md +2 -0
- data/README.md +16 -0
- data/docs/basic_read_api.md +3 -2
- data/docs/batch_processing.md +15 -3
- data/docs/examples.md +4 -2
- data/docs/header_transformations.md +11 -0
- data/docs/header_validations.md +14 -0
- data/ext/smarter_csv/Makefile +273 -0
- data/ext/smarter_csv/extconf.rb +4 -2
- data/ext/smarter_csv/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist +20 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml +5 -0
- data/ext/smarter_csv/smarter_csv.c +427 -10
- data/ext/smarter_csv/smarter_csv.o +0 -0
- data/lib/smarter_csv/auto_detection.rb +6 -5
- data/lib/smarter_csv/errors.rb +18 -2
- data/lib/smarter_csv/file_io.rb +5 -3
- data/lib/smarter_csv/hash_transformations.rb +75 -53
- data/lib/smarter_csv/header_validations.rb +2 -2
- data/lib/smarter_csv/headers.rb +1 -1
- data/lib/smarter_csv/parser.rb +60 -2
- data/lib/smarter_csv/reader.rb +31 -28
- data/lib/smarter_csv/version.rb +1 -1
- data/smarter_csv.gemspec +0 -1
- metadata +9 -20
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 40ec747f330628f6aebd66fd7478349007cd7d2f049ae5bddbbc3d67cc5f07be
|
|
4
|
+
data.tar.gz: 3d02147aee5983e9fabcd05aed3d0e4ac9da3399d7a231fa2905a5c1f061b9b3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cc825395fc200eca00ff37fb2a8d07d5e05a28c0b9fe2307f5ee5d8cbb7a6286d15856fbdb8b85904e2e685fa29a73a097ed4281068ad0f92833d35a3254fde3
|
|
7
|
+
data.tar.gz: 4f3050d6c33535d4b12e6737c5241708c87408bfb9c0cb320fb89470867bb338cd3359f6bf8ba4cdaa8d6566c1884f23436285ed50d61aef0f4356c772f83fa6
|
data/.rvmrc
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
rvm
|
|
1
|
+
rvm use default@smarter_csv --create
|
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,119 @@
|
|
|
1
1
|
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
|
3
3
|
|
|
4
|
+
## 1.15.0 (2026-02-04)
|
|
5
|
+
|
|
6
|
+
* Dropping support for Ruby 2.5
|
|
7
|
+
|
|
8
|
+
* Performance Optimizations
|
|
9
|
+
- 39% less memory allocated
|
|
10
|
+
- 43% fewer objects created
|
|
11
|
+
- ~5× faster at P90 vs SmarterCSV 1.14.4
|
|
12
|
+
- ~3–7× faster at P90 vs Ruby CSV
|
|
13
|
+
|
|
14
|
+
### New Features
|
|
15
|
+
|
|
16
|
+
* **Chunk index in block processing**: When using block-based processing, an optional second parameter `chunk_index` is now passed to the block. This 0-based index is useful for progress tracking and debugging. The change is backwards compatible - existing code continues to work.
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
SmarterCSV.process(file, chunk_size: 100) do |chunk, chunk_index|
|
|
20
|
+
puts "Processing chunk #{chunk_index}..."
|
|
21
|
+
Model.import(chunk)
|
|
22
|
+
end
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Exception Improvements
|
|
26
|
+
|
|
27
|
+
* `MissingKeys#keys` - programmatic access to missing keys without parsing error messages ([PR #314](https://github.com/tilo/smarter_csv/pull/314), thanks to Skye Shaw)
|
|
28
|
+
* `DuplicateHeaders#headers` - programmatic access to duplicate headers without parsing error messages
|
|
29
|
+
|
|
30
|
+
```ruby
|
|
31
|
+
# Example: accessing missing keys programmatically
|
|
32
|
+
rescue SmarterCSV::MissingKeys => e
|
|
33
|
+
e.keys # => [:employee_id, :department]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Example: accessing duplicate headers programmatically
|
|
37
|
+
rescue SmarterCSV::DuplicateHeaders => e
|
|
38
|
+
e.headers # => [:email]
|
|
39
|
+
end
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Performance Improvements
|
|
43
|
+
|
|
44
|
+
* **New `parse_line_to_hash_c` function**: Builds Ruby hash directly during parsing, eliminating intermediate array allocations. Previously, parsing created a values array, then `zip()` created pairs array, then `to_h()` built the hash. Now done in a single pass.
|
|
45
|
+
|
|
46
|
+
* **Shared empty string optimization**: Reuses a single frozen empty string for all empty CSV fields, reducing object allocations and GC pressure.
|
|
47
|
+
|
|
48
|
+
* **Faster quote counting**: New `count_quote_chars_c` function replaces Ruby's `each_char` iteration, eliminating one String object allocation per character.
|
|
49
|
+
|
|
50
|
+
* **Conditional nil padding**: Missing columns only padded with `nil` when `remove_empty_values: false`, avoiding unnecessary work in the default case.
|
|
51
|
+
|
|
52
|
+
### Ruby Code Optimizations
|
|
53
|
+
|
|
54
|
+
* **Frozen regex constants**: Numeric conversion patterns (`FLOAT_REGEX`, `INTEGER_REGEX`, `ZERO_REGEX`) are now pre-compiled and frozen, eliminating millions of regex compilations for large files. This alone reduced numeric conversion overhead from +75% to +4%.
|
|
55
|
+
|
|
56
|
+
* **In-place hash modification**: Hash transformations now modify hashes in-place instead of creating copies, reducing memory allocations by 39% and object count by 43%.
|
|
57
|
+
|
|
58
|
+
### Benchmark Results
|
|
59
|
+
|
|
60
|
+
Benchmarks using Ruby 3.4.7 on M1 Apple Silicon. All times in seconds.
|
|
61
|
+
|
|
62
|
+
**Summary:**
|
|
63
|
+
|
|
64
|
+
| Comparison | Range | Comments | P90 |
|
|
65
|
+
|----------------------|---------------------|----------------------|--------|
|
|
66
|
+
| vs SmarterCSV 1.14.4 | 2.6x - 3.5x faster | up to 20.5x for some | ~5x |
|
|
67
|
+
| vs CSV hashes | 1.9x - 3.8x faster | up to 6.7x for some | ~3x |
|
|
68
|
+
| vs CSV.table | 4.3x - 10.1x faster | up to 12.0x for some | ~7..8x |
|
|
69
|
+
|
|
70
|
+
_P90 measured over the full set of benchmarked files_
|
|
71
|
+
|
|
72
|
+
**These gains come while returning fully usable hashes with conversions, not raw arrays that require post-processing.**
|
|
73
|
+
|
|
74
|
+
**Memory improvements:** 39% less memory allocated, 43% fewer objects created
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
**vs SmarterCSV 1.14.4:**
|
|
78
|
+
|
|
79
|
+
| File | Size | Rows | 1.14.4 | 1.15.0 | Speedup |
|
|
80
|
+
|---------------------------|--------|------|--------|--------|------------|
|
|
81
|
+
| worldcities.csv | 5 MB | 48K | 1.27s | 0.49s | **2.6x** |
|
|
82
|
+
| LANDSAT_ETM_C2_L1_50k.csv | 31 MB | 50K | 6.73s | 1.99s | **3.4x** |
|
|
83
|
+
| PILOT_CERT.csv | 62 MB | 50K | 8.43s | 2.43s | **3.5x** |
|
|
84
|
+
| wide_500_cols_20k.csv | 98 MB | 20K | 19.38s | 5.09s | **3.8x** |
|
|
85
|
+
| long_fields_20k.csv | 22 MB | 20K | 3.05s | 0.15s | **20.5x** |
|
|
86
|
+
| embedded_newlines_20k.csv | 1.5 MB | 20K | 0.59s | 0.12s | **5.1x** |
|
|
87
|
+
|
|
88
|
+
**vs Ruby CSV 3.3.5:**
|
|
89
|
+
|
|
90
|
+
For an apples-to-apples comparison, we must compare parsers that return the same result structure and perform comparable work.
|
|
91
|
+
SmarterCSV returns an array of hashes with symbol keys and type conversion applied, so raw CSV array parsing is not a fair comparison.
|
|
92
|
+
|
|
93
|
+
**Beware of comparisons that focus solely on raw CSV parsing.**
|
|
94
|
+
Such benchmarks measure only tokenization, while real-world usage still **requires substantial post-processing to produce usable data**. Leaving this work out -- hash construction, normalization, type conversion, and edge-case handling to produce usable data -- consistently **understates the actual cost of CSV ingestion**.
|
|
95
|
+
|
|
96
|
+
For this reason, **CSV.table is the closest equivalent to SmarterCSV.**
|
|
97
|
+
|
|
98
|
+
| File | Size | Rows | CSV hashes | CSV.table | 1.15.0 | vs hashes | vs table |
|
|
99
|
+
|---------------------------|--------|------|------------|-----------|--------|-----------|------------|
|
|
100
|
+
| worldcities.csv | 5 MB | 48K | 1.06s | 2.12s | 0.49s | **2.2x** | **4.3x** |
|
|
101
|
+
| LANDSAT_ETM_C2_L1_50k.csv | 31 MB | 50K | 3.85s | 9.25s | 1.99s | **1.9x** | **4.7x** |
|
|
102
|
+
| PILOT_CERT.csv | 62 MB | 50K | 9.10s | 24.39s | 2.43s | **3.8x** | **10.1x** |
|
|
103
|
+
| wide_500_cols_20k.csv | 98 MB | 20K | 34.24s | 61.24s | 5.09s | **6.7x** | **12.0x** |
|
|
104
|
+
| long_fields_20k.csv | 22 MB | 20K | 0.34s | 0.81s | 0.15s | **2.3x** | **5.5x** |
|
|
105
|
+
| whitespace_heavy_20k.csv | 3.3 MB | 20K | 0.30s | 0.83s | 0.12s | **2.5x** | **7.0x** |
|
|
106
|
+
|
|
107
|
+
_CSV hashes = `CSV.read(file, headers: true).map(&:to_h)` (string keys, no conversion, still requires post-processing)_
|
|
108
|
+
_CSV.table = `CSV.table(file).map(&:to_h)` (symbol keys + numeric conversion, still requires post-processing)_
|
|
109
|
+
_worldcities.csv is [from here](https://simplemaps.com/data/world-cities)_
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
### Misc Fixes
|
|
113
|
+
|
|
114
|
+
* Fix compilation error on ARM macOS (`-march=native` unsupported) ([PR #313](https://github.com/tilo/smarter_csv/pull/313), thanks to Skye Shaw)
|
|
115
|
+
* CI improvements: Ruby 3.4 support, Codecov action update ([PR #311](https://github.com/tilo/smarter_csv/pull/311), thanks to Mark Bumiller)
|
|
116
|
+
|
|
4
117
|
## 1.14.4 (2025-05-26)
|
|
5
118
|
* Bugfix: SmarterCSV::Reader fixing issue with header containing spaces ([PR 305](https://github.com/tilo/smarter_csv/pull/305) thanks to Felipe Cabezudo)
|
|
6
119
|
|
data/CONTRIBUTORS.md
CHANGED
|
@@ -60,3 +60,5 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
|
|
|
60
60
|
* [Matthew Kennedy](https://github.com/MattKitmanLabs)
|
|
61
61
|
* [Robert Reiz](https://github.com/reiz)
|
|
62
62
|
* [Felipe Cabezudo](https://github.com/felipekb)
|
|
63
|
+
* [Skye Shaw](https://github.com/sshaw)
|
|
64
|
+
* [Mark Bumiller](https://github.com/makrsmark)
|
data/README.md
CHANGED
|
@@ -17,6 +17,22 @@ One user wrote:
|
|
|
17
17
|
|
|
18
18
|
> *Best gem for CSV for us yet. [...] taking an import process from 7+ hours to about 3 minutes. [...] Smarter CSV was a big part and helped clean up our code ALOT*
|
|
19
19
|
|
|
20
|
+
## Performance
|
|
21
|
+
|
|
22
|
+
SmarterCSV is designed for **real-world CSV processing**, returning fully usable hashes with symbol keys and type conversions — not raw arrays that require additional post-processing.
|
|
23
|
+
|
|
24
|
+
**Beware of benchmarks that only measure raw CSV parsing.** Such comparisons measure tokenization alone, while real-world usage requires hash construction, key normalization, type conversion, and edge-case handling. Omitting this work **understates the actual cost of CSV ingestion**.
|
|
25
|
+
|
|
26
|
+
For a fair comparison, `CSV.table` is the closest Ruby CSV equivalent to SmarterCSV.
|
|
27
|
+
|
|
28
|
+
| Comparison | Speedup (P90) |
|
|
29
|
+
|----------------------|------------------|
|
|
30
|
+
| vs SmarterCSV 1.14.4 | ~5× faster |
|
|
31
|
+
| vs CSV.table | ~7× faster |
|
|
32
|
+
| vs CSV hashes | ~3× faster |
|
|
33
|
+
|
|
34
|
+
_Benchmarks: Ruby 3.4.7, M1 Apple Silicon. Memory: 39% less allocated, 43% fewer objects. See [CHANGELOG](./CHANGELOG.md) for details._
|
|
35
|
+
|
|
20
36
|
# Installation
|
|
21
37
|
|
|
22
38
|
Add this line to your application's Gemfile:
|
data/docs/basic_read_api.md
CHANGED
|
@@ -39,11 +39,12 @@ It can also be used with a block:
|
|
|
39
39
|
end
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
-
It can also be used for processing batches of rows:
|
|
42
|
+
It can also be used for processing batches of rows. An optional second block parameter provides the 0-based chunk index:
|
|
43
43
|
|
|
44
44
|
```
|
|
45
|
-
SmarterCSV.process(file_or_input, {chunk_size: 100}
|
|
45
|
+
SmarterCSV.process(file_or_input, {chunk_size: 100}) do |array_of_hashes, chunk_index|
|
|
46
46
|
# process one chunk of up to 100 rows of CSV data
|
|
47
|
+
puts "Processing chunk #{chunk_index}..."
|
|
47
48
|
end
|
|
48
49
|
```
|
|
49
50
|
|
data/docs/batch_processing.md
CHANGED
|
@@ -21,6 +21,14 @@ This can come in handy when you don't want to slow-down the CSV import of large
|
|
|
21
21
|
|
|
22
22
|
Setting the option `chunk_size` sets the max batch size.
|
|
23
23
|
|
|
24
|
+
When using a block, an optional second parameter `chunk_index` is passed, representing the 0-based index of the current chunk. This is useful for progress tracking and debugging:
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
SmarterCSV.process(filename, {chunk_size: 100}) do |chunk, chunk_index|
|
|
28
|
+
puts "Processing chunk #{chunk_index}"
|
|
29
|
+
MyModel.insert_all(chunk)
|
|
30
|
+
end
|
|
31
|
+
```
|
|
24
32
|
|
|
25
33
|
## Example 1: How SmarterCSV processes CSV-files as chunks, returning arrays of hashes:
|
|
26
34
|
Please note how the returned array contains two sub-arrays containing the chunks which were read, each chunk containing 2 hashes.
|
|
@@ -34,11 +42,13 @@ In case the number of rows is not cleanly divisible by `:chunk_size`, the last c
|
|
|
34
42
|
```
|
|
35
43
|
|
|
36
44
|
## Example 2: How SmarterCSV processes CSV-files as chunks, and passes arrays of hashes to a given block:
|
|
37
|
-
Please note how the given block is passed the data for each chunk as the parameter (array of hashes),
|
|
38
|
-
|
|
45
|
+
Please note how the given block is passed the data for each chunk as the first parameter (array of hashes),
|
|
46
|
+
with an optional second parameter for the chunk index (0-based).
|
|
47
|
+
The `process` method returns the number of chunks when called with a block.
|
|
39
48
|
|
|
40
49
|
```ruby
|
|
41
|
-
> total_chunks = SmarterCSV.process('/tmp/pets.csv', {:chunk_size => 2, :key_mapping => {:first_name => :first, :last_name => :last}}) do |chunk|
|
|
50
|
+
> total_chunks = SmarterCSV.process('/tmp/pets.csv', {:chunk_size => 2, :key_mapping => {:first_name => :first, :last_name => :last}}) do |chunk, chunk_index|
|
|
51
|
+
puts "Processing chunk #{chunk_index}..."
|
|
42
52
|
chunk.each do |h| # you can post-process the data from each row to your heart's content, and also create virtual attributes:
|
|
43
53
|
h[:full_name] = [h[:first],h[:last]].join(' ') # create a virtual attribute
|
|
44
54
|
h.delete(:first) ; h.delete(:last) # remove two keys
|
|
@@ -46,7 +56,9 @@ and how the `process` method returns the number of chunks when called with a blo
|
|
|
46
56
|
puts chunk.inspect # we could at this point pass the chunk to a Resque worker..
|
|
47
57
|
end
|
|
48
58
|
|
|
59
|
+
Processing chunk 0...
|
|
49
60
|
[{:dogs=>"2", :full_name=>"Dan McAllister"}, {:cats=>"5", :full_name=>"Lucy Laweless"}]
|
|
61
|
+
Processing chunk 1...
|
|
50
62
|
[{:fish=>"21", :full_name=>"Miles O'Brian"}, {:dogs=>"2", :birds=>"1", :full_name=>"Nancy Homes"}]
|
|
51
63
|
=> 2
|
|
52
64
|
```
|
data/docs/examples.md
CHANGED
|
@@ -61,16 +61,18 @@ Please note how each hash contains only the keys for columns with non-null value
|
|
|
61
61
|
```
|
|
62
62
|
|
|
63
63
|
## Example 4: Processing a CSV File, and inserting batch jobs in Sidekiq:
|
|
64
|
+
The block receives an optional second parameter `chunk_index` (0-based) for progress tracking:
|
|
64
65
|
```ruby
|
|
65
66
|
filename = '/tmp/input.csv' # CSV file containing ids or data to process
|
|
66
67
|
options = { :chunk_size => 100 }
|
|
67
|
-
n = SmarterCSV.process(filename, options) do |chunk|
|
|
68
|
+
n = SmarterCSV.process(filename, options) do |chunk, chunk_index|
|
|
69
|
+
puts "Queueing chunk #{chunk_index} with #{chunk.size} records..."
|
|
68
70
|
Sidekiq::Client.push_bulk(
|
|
69
71
|
'class' => SidekiqIndividualWorkerClass,
|
|
70
72
|
'args' => chunk,
|
|
71
73
|
)
|
|
72
74
|
# OR:
|
|
73
|
-
# SidekiqBatchWorkerClass.process_async(chunk
|
|
75
|
+
# SidekiqBatchWorkerClass.process_async(chunk) # pass an array of hashes to Sidekiq workers for parallel processing
|
|
74
76
|
end
|
|
75
77
|
=> returns number of chunks
|
|
76
78
|
```
|
|
@@ -67,6 +67,17 @@ If you want to have an underscore between the header and the number, you can set
|
|
|
67
67
|
|
|
68
68
|
If you set `duplicate_header_suffix: nil`, you get the same behavior as earlier versions, which raised the `SmarterCSV::DuplicateHeaders` error.
|
|
69
69
|
|
|
70
|
+
When `SmarterCSV::DuplicateHeaders` is raised, you can access the duplicate headers directly via the `headers` accessor:
|
|
71
|
+
|
|
72
|
+
```ruby
|
|
73
|
+
begin
|
|
74
|
+
data = SmarterCSV.process('/tmp/dupe.csv', {duplicate_header_suffix: nil})
|
|
75
|
+
rescue SmarterCSV::DuplicateHeaders => e
|
|
76
|
+
puts "Duplicate columns: #{e.headers.join(', ')}"
|
|
77
|
+
# => e.headers returns [:name] (array of duplicate header symbols)
|
|
78
|
+
end
|
|
79
|
+
```
|
|
80
|
+
|
|
70
81
|
## Key Mapping
|
|
71
82
|
|
|
72
83
|
The above example already illustrates how intermediate keys can be mapped into something different.
|
data/docs/header_validations.md
CHANGED
|
@@ -33,5 +33,19 @@ If these keys are not present, `SmarterCSV::MissingKeys` will be raised to infor
|
|
|
33
33
|
=> this will raise SmarterCSV::MissingKeys if any row does not contain these three keys
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
+
## Handling Missing Keys Programmatically
|
|
37
|
+
|
|
38
|
+
When `SmarterCSV::MissingKeys` is raised, you can access the missing keys directly via the `keys` accessor, without parsing the error message:
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
begin
|
|
42
|
+
options = { required_keys: [:source_account, :destination_account, :amount] }
|
|
43
|
+
data = SmarterCSV.process("/tmp/transactions.csv", options)
|
|
44
|
+
rescue SmarterCSV::MissingKeys => e
|
|
45
|
+
puts "Missing columns: #{e.keys.join(', ')}"
|
|
46
|
+
# => e.keys returns [:amount] (array of missing key symbols)
|
|
47
|
+
end
|
|
48
|
+
```
|
|
49
|
+
|
|
36
50
|
----------------
|
|
37
51
|
PREVIOUS: [Header Transformations](./header_transformations.md) | NEXT: [Data Transformations](./data_transformations.md)
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
|
|
2
|
+
SHELL = /bin/sh
|
|
3
|
+
|
|
4
|
+
# V=0 quiet, V=1 verbose. other values don't work.
|
|
5
|
+
V = 0
|
|
6
|
+
V0 = $(V:0=)
|
|
7
|
+
Q1 = $(V:1=)
|
|
8
|
+
Q = $(Q1:0=@)
|
|
9
|
+
ECHO1 = $(V:1=@ :)
|
|
10
|
+
ECHO = $(ECHO1:0=@ echo)
|
|
11
|
+
NULLCMD = :
|
|
12
|
+
|
|
13
|
+
#### Start of system configuration section. ####
|
|
14
|
+
|
|
15
|
+
srcdir = .
|
|
16
|
+
topdir = /Users/tilo/.rvm/rubies/ruby-3.4.7/include/ruby-3.4.0
|
|
17
|
+
hdrdir = $(topdir)
|
|
18
|
+
arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.4.7/include/ruby-3.4.0/arm64-darwin25
|
|
19
|
+
PATH_SEPARATOR = :
|
|
20
|
+
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
|
21
|
+
prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.4.7
|
|
22
|
+
rubysitearchprefix = $(rubylibprefix)/$(sitearch)
|
|
23
|
+
rubyarchprefix = $(rubylibprefix)/$(arch)
|
|
24
|
+
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
|
|
25
|
+
exec_prefix = $(prefix)
|
|
26
|
+
vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
|
|
27
|
+
sitearchhdrdir = $(sitehdrdir)/$(sitearch)
|
|
28
|
+
rubyarchhdrdir = $(rubyhdrdir)/$(arch)
|
|
29
|
+
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
|
30
|
+
sitehdrdir = $(rubyhdrdir)/site_ruby
|
|
31
|
+
rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
|
|
32
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
|
33
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
|
34
|
+
vendordir = $(rubylibprefix)/vendor_ruby
|
|
35
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
|
36
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
|
37
|
+
sitedir = $(rubylibprefix)/site_ruby
|
|
38
|
+
rubyarchdir = $(rubylibdir)/$(arch)
|
|
39
|
+
rubylibdir = $(rubylibprefix)/$(ruby_version)
|
|
40
|
+
sitearchincludedir = $(includedir)/$(sitearch)
|
|
41
|
+
archincludedir = $(includedir)/$(arch)
|
|
42
|
+
sitearchlibdir = $(libdir)/$(sitearch)
|
|
43
|
+
archlibdir = $(libdir)/$(arch)
|
|
44
|
+
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
|
45
|
+
modular_gc_dir = $(DESTDIR)
|
|
46
|
+
mandir = $(datarootdir)/man
|
|
47
|
+
localedir = $(datarootdir)/locale
|
|
48
|
+
libdir = $(exec_prefix)/lib
|
|
49
|
+
psdir = $(docdir)
|
|
50
|
+
pdfdir = $(docdir)
|
|
51
|
+
dvidir = $(docdir)
|
|
52
|
+
htmldir = $(docdir)
|
|
53
|
+
infodir = $(datarootdir)/info
|
|
54
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
|
55
|
+
oldincludedir = $(DESTDIR)/usr/include
|
|
56
|
+
includedir = $(SDKROOT)$(prefix)/include
|
|
57
|
+
runstatedir = $(localstatedir)/run
|
|
58
|
+
localstatedir = $(prefix)/var
|
|
59
|
+
sharedstatedir = $(prefix)/com
|
|
60
|
+
sysconfdir = $(prefix)/etc
|
|
61
|
+
datadir = $(datarootdir)
|
|
62
|
+
datarootdir = $(prefix)/share
|
|
63
|
+
libexecdir = $(exec_prefix)/libexec
|
|
64
|
+
sbindir = $(exec_prefix)/sbin
|
|
65
|
+
bindir = $(exec_prefix)/bin
|
|
66
|
+
archdir = $(rubyarchdir)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
CC_WRAPPER =
|
|
70
|
+
CC = gcc
|
|
71
|
+
CXX = g++
|
|
72
|
+
LIBRUBY = $(LIBRUBY_SO)
|
|
73
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
|
74
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
|
75
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static -framework CoreFoundation $(MAINLIBS)
|
|
76
|
+
empty =
|
|
77
|
+
OUTFLAG = -o $(empty)
|
|
78
|
+
COUTFLAG = -o $(empty)
|
|
79
|
+
CSRCFLAG = $(empty)
|
|
80
|
+
|
|
81
|
+
RUBY_EXTCONF_H =
|
|
82
|
+
cflags = $(hardenflags) -fdeclspec $(optflags) $(debugflags) $(warnflags)
|
|
83
|
+
cxxflags =
|
|
84
|
+
optflags = -O3 -march=native -flto -fomit-frame-pointer -DNDEBUG
|
|
85
|
+
debugflags =
|
|
86
|
+
warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -Wmisleading-indentation -Wundef
|
|
87
|
+
cppflags =
|
|
88
|
+
CCDLFLAGS = -fno-common
|
|
89
|
+
CFLAGS = $(CCDLFLAGS) -O3 -I/opt/homebrew/opt/libyaml/include -I/opt/homebrew/opt/libksba/include -I/opt/homebrew/opt/readline/include -I/opt/homebrew/opt/zlib/include -I/opt/homebrew/opt/openssl@1.1/include $(cflags) -fno-common -pipe $(ARCH_FLAG)
|
|
90
|
+
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
|
91
|
+
DEFS =
|
|
92
|
+
CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT $(DEFS) $(cppflags)
|
|
93
|
+
CXXFLAGS = $(CCDLFLAGS) -fdeclspec $(ARCH_FLAG)
|
|
94
|
+
ldflags = -L. -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -fstack-protector-strong
|
|
95
|
+
dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup
|
|
96
|
+
ARCH_FLAG = -arch arm64
|
|
97
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
|
98
|
+
LDSHARED = $(CC) -dynamic -bundle
|
|
99
|
+
LDSHAREDXX = $(CXX) -dynamic -bundle
|
|
100
|
+
POSTLINK = dsymutil $@ 2>/dev/null; { test -z '$(RUBY_CODESIGN)' || codesign -s '$(RUBY_CODESIGN)' $@; }
|
|
101
|
+
AR = ar
|
|
102
|
+
LD = ld
|
|
103
|
+
EXEEXT =
|
|
104
|
+
|
|
105
|
+
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
|
106
|
+
RUBY_SO_NAME = ruby.3.4
|
|
107
|
+
RUBYW_INSTALL_NAME =
|
|
108
|
+
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
|
|
109
|
+
RUBYW_BASE_NAME = rubyw
|
|
110
|
+
RUBY_BASE_NAME = ruby
|
|
111
|
+
|
|
112
|
+
arch = arm64-darwin25
|
|
113
|
+
sitearch = $(arch)
|
|
114
|
+
ruby_version = 3.4.0
|
|
115
|
+
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
|
116
|
+
RUBY = $(ruby)
|
|
117
|
+
BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
|
|
118
|
+
ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
|
|
119
|
+
|
|
120
|
+
RM = rm -f
|
|
121
|
+
RM_RF = rm -fr
|
|
122
|
+
RMDIRS = rmdir -p
|
|
123
|
+
MAKEDIRS = /opt/homebrew/opt/coreutils/bin/gmkdir -p
|
|
124
|
+
INSTALL = /opt/homebrew/opt/coreutils/bin/ginstall -c
|
|
125
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
|
126
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
|
127
|
+
COPY = cp
|
|
128
|
+
TOUCH = exit >
|
|
129
|
+
|
|
130
|
+
#### End of system configuration section. ####
|
|
131
|
+
|
|
132
|
+
preload =
|
|
133
|
+
libpath = . $(libdir)
|
|
134
|
+
LIBPATH = -L. -L$(libdir)
|
|
135
|
+
DEFFILE =
|
|
136
|
+
|
|
137
|
+
CLEANFILES = mkmf.log
|
|
138
|
+
DISTCLEANFILES =
|
|
139
|
+
DISTCLEANDIRS =
|
|
140
|
+
|
|
141
|
+
extout =
|
|
142
|
+
extout_prefix =
|
|
143
|
+
target_prefix = /smarter_csv
|
|
144
|
+
LOCAL_LIBS =
|
|
145
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread
|
|
146
|
+
ORIG_SRCS = smarter_csv.c
|
|
147
|
+
SRCS = $(ORIG_SRCS)
|
|
148
|
+
OBJS = smarter_csv.o
|
|
149
|
+
HDRS =
|
|
150
|
+
LOCAL_HDRS =
|
|
151
|
+
TARGET = smarter_csv
|
|
152
|
+
TARGET_NAME = smarter_csv
|
|
153
|
+
TARGET_ENTRY = Init_$(TARGET_NAME)
|
|
154
|
+
DLLIB = $(TARGET).bundle
|
|
155
|
+
EXTSTATIC =
|
|
156
|
+
STATIC_LIB =
|
|
157
|
+
|
|
158
|
+
TIMESTAMP_DIR = .
|
|
159
|
+
BINDIR = $(bindir)
|
|
160
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
|
161
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
|
162
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
|
163
|
+
HDRDIR = $(sitehdrdir)$(target_prefix)
|
|
164
|
+
ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
|
|
165
|
+
TARGET_SO_DIR =
|
|
166
|
+
TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
|
|
167
|
+
CLEANLIBS = $(TARGET_SO) $(TARGET_SO:=.dSYM)
|
|
168
|
+
CLEANOBJS = $(OBJS) *.bak
|
|
169
|
+
TARGET_SO_DIR_TIMESTAMP = $(TIMESTAMP_DIR)/.sitearchdir.-.smarter_csv.time
|
|
170
|
+
|
|
171
|
+
all: $(DLLIB)
|
|
172
|
+
static: $(STATIC_LIB)
|
|
173
|
+
.PHONY: all install static install-so install-rb
|
|
174
|
+
.PHONY: clean clean-so clean-static clean-rb
|
|
175
|
+
|
|
176
|
+
clean-static::
|
|
177
|
+
clean-rb-default::
|
|
178
|
+
clean-rb::
|
|
179
|
+
clean-so::
|
|
180
|
+
clean: clean-so clean-static clean-rb-default clean-rb
|
|
181
|
+
-$(Q)$(RM_RF) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
|
|
182
|
+
|
|
183
|
+
distclean-rb-default::
|
|
184
|
+
distclean-rb::
|
|
185
|
+
distclean-so::
|
|
186
|
+
distclean-static::
|
|
187
|
+
distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
|
|
188
|
+
-$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
|
189
|
+
-$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
|
190
|
+
-$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
|
|
191
|
+
|
|
192
|
+
realclean: distclean
|
|
193
|
+
install: install-so install-rb
|
|
194
|
+
|
|
195
|
+
install-so: $(DLLIB) $(TARGET_SO_DIR_TIMESTAMP)
|
|
196
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
|
197
|
+
clean-static::
|
|
198
|
+
-$(Q)$(RM) $(STATIC_LIB)
|
|
199
|
+
install-rb: pre-install-rb do-install-rb install-rb-default
|
|
200
|
+
install-rb-default: pre-install-rb-default do-install-rb-default
|
|
201
|
+
pre-install-rb: Makefile
|
|
202
|
+
pre-install-rb-default: Makefile
|
|
203
|
+
do-install-rb:
|
|
204
|
+
do-install-rb-default:
|
|
205
|
+
pre-install-rb-default:
|
|
206
|
+
@$(NULLCMD)
|
|
207
|
+
$(TARGET_SO_DIR_TIMESTAMP):
|
|
208
|
+
$(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
|
|
209
|
+
$(Q) $(TOUCH) $@
|
|
210
|
+
|
|
211
|
+
site-install: site-install-so site-install-rb
|
|
212
|
+
site-install-so: install-so
|
|
213
|
+
site-install-rb: install-rb
|
|
214
|
+
|
|
215
|
+
.SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
|
|
216
|
+
|
|
217
|
+
.cc.o:
|
|
218
|
+
$(ECHO) compiling $(<)
|
|
219
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
|
220
|
+
|
|
221
|
+
.cc.S:
|
|
222
|
+
$(ECHO) translating $(<)
|
|
223
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
|
224
|
+
|
|
225
|
+
.mm.o:
|
|
226
|
+
$(ECHO) compiling $(<)
|
|
227
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
|
228
|
+
|
|
229
|
+
.mm.S:
|
|
230
|
+
$(ECHO) translating $(<)
|
|
231
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
|
232
|
+
|
|
233
|
+
.cxx.o:
|
|
234
|
+
$(ECHO) compiling $(<)
|
|
235
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
|
236
|
+
|
|
237
|
+
.cxx.S:
|
|
238
|
+
$(ECHO) translating $(<)
|
|
239
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
|
240
|
+
|
|
241
|
+
.cpp.o:
|
|
242
|
+
$(ECHO) compiling $(<)
|
|
243
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
|
244
|
+
|
|
245
|
+
.cpp.S:
|
|
246
|
+
$(ECHO) translating $(<)
|
|
247
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
|
248
|
+
|
|
249
|
+
.c.o:
|
|
250
|
+
$(ECHO) compiling $(<)
|
|
251
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
|
252
|
+
|
|
253
|
+
.c.S:
|
|
254
|
+
$(ECHO) translating $(<)
|
|
255
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
|
256
|
+
|
|
257
|
+
.m.o:
|
|
258
|
+
$(ECHO) compiling $(<)
|
|
259
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
|
260
|
+
|
|
261
|
+
.m.S:
|
|
262
|
+
$(ECHO) translating $(<)
|
|
263
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
|
264
|
+
|
|
265
|
+
$(TARGET_SO): $(OBJS) Makefile
|
|
266
|
+
$(ECHO) linking shared-object smarter_csv/$(DLLIB)
|
|
267
|
+
-$(Q)$(RM) $(@)
|
|
268
|
+
$(Q) $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
|
269
|
+
$(Q) $(POSTLINK)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
$(OBJS): $(HDRS) $(ruby_headers)
|
data/ext/smarter_csv/extconf.rb
CHANGED
|
@@ -9,8 +9,10 @@ if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
|
|
|
9
9
|
RbConfig::MAKEFILE_CONFIG["CFLAGS"] = fixed_CFLAGS
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
optflags = "-O3 -flto -fomit-frame-pointer -DNDEBUG".dup
|
|
13
|
+
optflags << " -march=native" unless RUBY_PLATFORM.start_with?("arm64-darwin")
|
|
14
|
+
|
|
15
|
+
CONFIG["optflags"] = optflags
|
|
14
16
|
CONFIG["debugflags"] = ""
|
|
15
17
|
|
|
16
18
|
create_makefile('smarter_csv/smarter_csv')
|
|
Binary file
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
+
<plist version="1.0">
|
|
4
|
+
<dict>
|
|
5
|
+
<key>CFBundleDevelopmentRegion</key>
|
|
6
|
+
<string>English</string>
|
|
7
|
+
<key>CFBundleIdentifier</key>
|
|
8
|
+
<string>com.apple.xcode.dsym.smarter_csv.bundle</string>
|
|
9
|
+
<key>CFBundleInfoDictionaryVersion</key>
|
|
10
|
+
<string>6.0</string>
|
|
11
|
+
<key>CFBundlePackageType</key>
|
|
12
|
+
<string>dSYM</string>
|
|
13
|
+
<key>CFBundleSignature</key>
|
|
14
|
+
<string>????</string>
|
|
15
|
+
<key>CFBundleShortVersionString</key>
|
|
16
|
+
<string>1.0</string>
|
|
17
|
+
<key>CFBundleVersion</key>
|
|
18
|
+
<string>1</string>
|
|
19
|
+
</dict>
|
|
20
|
+
</plist>
|
|
Binary file
|