parshap-csv_parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/README.md +103 -0
- data/csv_parser.rb +97 -0
- data/example.rb +74 -0
- data/parshap-csv_parser.gemspec +15 -0
- data/test.rb +95 -0
- metadata +65 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: bf9451df46924fe41eda4419f25e0671935c25f7
|
|
4
|
+
data.tar.gz: 4f6f2c86e00d2d196cab35b82ee51008d9961d5a
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 0dc0ca17212b387126bdfce745bbea9180c7034bfc04017458da645b0ee277dbe5100ef79f0e108a983d29d6ee66150495b9c97fbd521fb897e0af2b605c8b59
|
|
7
|
+
data.tar.gz: 83006cb3d94a7ef47d98645fba7a346ed77d264bcc5d59edcce8e400f49505c59eac84ce6da5c5c9d41d71cea45771d6296f8008a21ccb0151a7829be0c7ecd9
|
data/Gemfile
ADDED
data/README.md
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# CSVParser
|
|
2
|
+
|
|
3
|
+
Parse CSV rows by defining parsing blocks for individual columns.
|
|
4
|
+
|
|
5
|
+
Each row is parsed one-by-one. First a new *Hash* is initialized to
|
|
6
|
+
store data for the row. Then, each **individual column** is parsed by
|
|
7
|
+
calling matching parsing blocks. Parsing blocks are passed the column's
|
|
8
|
+
value and header key and can set arbitrary state on the *Hash* for the
|
|
9
|
+
current row.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
$ gem install parshap-csv_parser
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
Given a `data.csv` file:
|
|
20
|
+
|
|
21
|
+
```csv
|
|
22
|
+
name,phone
|
|
23
|
+
john doe,555-481-2345
|
|
24
|
+
jane c doe,555-123-4567
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
You can parse it like so:
|
|
28
|
+
|
|
29
|
+
```rb
|
|
30
|
+
require "csv_parser"
|
|
31
|
+
|
|
32
|
+
class MyParser < CSVParser
|
|
33
|
+
parse "Name" do |val|
|
|
34
|
+
self[:first_name], self[:last_name] = val.split(nil, 2)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
parse /Phone( Number)?/ do |val|
|
|
38
|
+
self[:phone] = val
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
MyParser.new(CSV.open "data.csv").each do |row|
|
|
43
|
+
puts "#{row[:first_name]}, #{row[:last_name]}: #{row[:phone]}"
|
|
44
|
+
end
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
See [`example.rb`](example.rb) and [`test.rb`](test.rb) for more examples.
|
|
48
|
+
|
|
49
|
+
### Defining Parsers
|
|
50
|
+
|
|
51
|
+
Parsing blocks are added using the `CSVParser.parse` class method. The
|
|
52
|
+
first and only parameter, *case*, determines if the block should be
|
|
53
|
+
executed for a particular column (by using the `===` operator with the
|
|
54
|
+
column's header value). The block is passed the column value and its
|
|
55
|
+
associated header value. The block can update the values for the current
|
|
56
|
+
row by using `self` as a *Hash*.
|
|
57
|
+
|
|
58
|
+
Column and header values are always converted to strings and `strip`ped
|
|
59
|
+
of whitespace first.
|
|
60
|
+
|
|
61
|
+
```rb
|
|
62
|
+
class MyParser < CSVParser
|
|
63
|
+
parse /^(first|last)?\W*name$/i do |val|
|
|
64
|
+
self[:name] = val.capitalize
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
#### Once Parsers
|
|
70
|
+
|
|
71
|
+
Using `CSVParser.parse_once`, you can define parsers that will only be
|
|
72
|
+
called once per row, for the first matching column. In the above
|
|
73
|
+
example, if `parse_once` was used, the block would only be called once
|
|
74
|
+
even with the occurrence of multiple *name* columns.
|
|
75
|
+
|
|
76
|
+
### Default Row Values
|
|
77
|
+
|
|
78
|
+
The `CSVParser#defaults` method is used to generate a hash to use for
|
|
79
|
+
each row. You can use this to set default values.
|
|
80
|
+
|
|
81
|
+
```rb
|
|
82
|
+
class MyParser < CSVParser
|
|
83
|
+
def defaults
|
|
84
|
+
{ name: "User", emails: [] }
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### CSV Instance
|
|
90
|
+
|
|
91
|
+
The `CSVParser` constructor takes an instance of the [Ruby Standard
|
|
92
|
+
Library `CSV`
|
|
93
|
+
class](http://ruby-doc.org/stdlib-2.1.0/libdoc/csv/rdoc/CSV.html). This
|
|
94
|
+
object can be created in any way, but **the [`:headers`
|
|
95
|
+
option](http://ruby-doc.org/stdlib-2.1.0/libdoc/csv/rdoc/CSV.html#method-c-new)
|
|
96
|
+
must be `false`**.
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
## Tests
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
$ ruby test.rb
|
|
103
|
+
```
|
data/csv_parser.rb
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
class CSVParser
|
|
2
|
+
class << self
|
|
3
|
+
def parsers
|
|
4
|
+
@parsers ||= []
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
private
|
|
8
|
+
|
|
9
|
+
# Add a column parser
|
|
10
|
+
def parse(criteria, params={}, &block)
|
|
11
|
+
parsers << {
|
|
12
|
+
criteria: criteria,
|
|
13
|
+
block: block,
|
|
14
|
+
}.merge(params)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Add a parser that will only get called once per row
|
|
18
|
+
def parse_once(criteria, params={}, &block)
|
|
19
|
+
parse criteria, {
|
|
20
|
+
once: true,
|
|
21
|
+
}.merge(params), &block
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def initialize(csv)
|
|
26
|
+
@csv = csv
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
include Enumerable
|
|
30
|
+
|
|
31
|
+
def each
|
|
32
|
+
# Get header values used later in prasing
|
|
33
|
+
@headers = @csv.shift.map(&:to_s).map(&:strip)
|
|
34
|
+
|
|
35
|
+
# Parse each row
|
|
36
|
+
@csv.each do |row|
|
|
37
|
+
yield parse_row row
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def parsers
|
|
44
|
+
self.class.parsers
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def parse_row(row)
|
|
48
|
+
# Create a new attributes hash for this row, this will be our result
|
|
49
|
+
@attributes = defaults
|
|
50
|
+
# Keep track of which parsers have already been executed for this row
|
|
51
|
+
@executed = []
|
|
52
|
+
|
|
53
|
+
# Parse each column of the row
|
|
54
|
+
row.each_with_index do |val, i|
|
|
55
|
+
parse_val val.to_s.strip, @headers[i].to_s
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Return the attributes that were built using #[]=
|
|
59
|
+
@attributes
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Parse a column value
|
|
63
|
+
def parse_val(val, key)
|
|
64
|
+
parsers.each do |parser|
|
|
65
|
+
# Execute any parsers that match this column
|
|
66
|
+
if ! onced?(parser) && match?(parser, val, key)
|
|
67
|
+
instance_exec val, key, &parser[:block]
|
|
68
|
+
@executed << parser
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Is the parser a once parser and has already been executed for this row?
|
|
74
|
+
def onced?(parser)
|
|
75
|
+
parser[:once] && @executed.include?(parser)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Does the parser criteria match the column?
|
|
79
|
+
def match?(parser, val, key)
|
|
80
|
+
parser[:criteria] === key
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Default hash values to use for each row
|
|
84
|
+
def defaults
|
|
85
|
+
Hash.new
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
protected
|
|
89
|
+
|
|
90
|
+
def [](name)
|
|
91
|
+
@attributes[name]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def []=(name, val)
|
|
95
|
+
@attributes[name] = val
|
|
96
|
+
end
|
|
97
|
+
end
|
data/example.rb
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
class ExampleParser < CSVParser
|
|
2
|
+
parse_once "Name" do |val|
|
|
3
|
+
first_name, last_name = val.split(nil, 2)
|
|
4
|
+
self[:first_name] = first_name
|
|
5
|
+
self[:last_name] = last_name
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
parse_once "Search Timeframe" do |val|
|
|
9
|
+
self[:timeframe] = val
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
parse_once "Email (Personal) #1" do |val|
|
|
13
|
+
self[:email] = split(val).join ","
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
parse_once "Contact Type" do |val|
|
|
17
|
+
self[:contact_types] << val
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
parse_once /^Phone (Mobile) #\d$/ do |val|
|
|
21
|
+
self[:phone_numbers] << {
|
|
22
|
+
label: "Cell",
|
|
23
|
+
number: val,
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Notes
|
|
28
|
+
[
|
|
29
|
+
"Note",
|
|
30
|
+
"Home Type",
|
|
31
|
+
"Latest Communication",
|
|
32
|
+
/^Listing #\d$/,
|
|
33
|
+
].each do |name|
|
|
34
|
+
parse name do |val|
|
|
35
|
+
self[:notes] << {
|
|
36
|
+
content: val
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Property Search
|
|
42
|
+
|
|
43
|
+
parse_once "Min. Price" do |val|
|
|
44
|
+
self[:property_search][:price_low] = val
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
parse_once "Max. Price" do |val|
|
|
48
|
+
self[:property_search][:price_high] = val
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
parse /^Location #\d$/ do |val|
|
|
52
|
+
self[:property_search][:misc_locations] << {
|
|
53
|
+
name: "Other #{val}",
|
|
54
|
+
location_value: val,
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def split(s)
|
|
61
|
+
s.split(/[\s*,;]/).map(&:strip).reject(&:empty?)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def defaults
|
|
65
|
+
{
|
|
66
|
+
notes: [],
|
|
67
|
+
contact_types: [],
|
|
68
|
+
phone_numbers: [],
|
|
69
|
+
property_search: {
|
|
70
|
+
misc_locations: [],
|
|
71
|
+
},
|
|
72
|
+
}
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Gem::Specification.new do |spec|
|
|
2
|
+
spec.name = "parshap-csv_parser"
|
|
3
|
+
spec.version = "1.0.0"
|
|
4
|
+
spec.authors = ["Parsha Pourkhomami"]
|
|
5
|
+
spec.email = ["parshap+gem@gmail.com"]
|
|
6
|
+
spec.summary = "High-level CSV parser"
|
|
7
|
+
spec.homepage = "https://github.com/parshap/csv-parser"
|
|
8
|
+
spec.license = "Public Domain"
|
|
9
|
+
spec.require_paths = ["."]
|
|
10
|
+
|
|
11
|
+
spec.files = `git ls-files`.split($/)
|
|
12
|
+
spec.test_files = spec.files.grep(/^test/)
|
|
13
|
+
|
|
14
|
+
spec.add_development_dependency "test-unit"
|
|
15
|
+
end
|
data/test.rb
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
require "test/unit"
|
|
2
|
+
require "csv"
|
|
3
|
+
require_relative "csv_parser"
|
|
4
|
+
|
|
5
|
+
class SimpleParser < CSVParser
|
|
6
|
+
attr_reader :called
|
|
7
|
+
attr_reader :once_called
|
|
8
|
+
attr_reader :first_called
|
|
9
|
+
|
|
10
|
+
def initialize(csv)
|
|
11
|
+
@called = 0
|
|
12
|
+
@once_called = 0
|
|
13
|
+
@first_called = 0
|
|
14
|
+
super
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
parse String do |val, key|
|
|
18
|
+
@called += 1
|
|
19
|
+
self[key] = val
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
parse_once String do
|
|
23
|
+
@once_called += 1
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
parse "first" do
|
|
27
|
+
@first_called += 1
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
class RegexpParser < CSVParser
|
|
32
|
+
parse /first(_name)?/ do |val, key|
|
|
33
|
+
self[key] = val
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class CSVParserTest < Test::Unit::TestCase
|
|
38
|
+
def csv
|
|
39
|
+
CSV.new <<-EOF
|
|
40
|
+
first,last
|
|
41
|
+
parsha,pourkhomami
|
|
42
|
+
hey,you
|
|
43
|
+
EOF
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_basic
|
|
47
|
+
count = 0
|
|
48
|
+
CSVParser.new(csv).each do |data|
|
|
49
|
+
assert data
|
|
50
|
+
count += 1
|
|
51
|
+
end
|
|
52
|
+
assert_equal 2, count
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_simple_parser
|
|
56
|
+
assert_equal 0, CSVParser.parsers.length
|
|
57
|
+
assert_equal 3, SimpleParser.parsers.length
|
|
58
|
+
|
|
59
|
+
parser = SimpleParser.new csv
|
|
60
|
+
count = 0
|
|
61
|
+
array = []
|
|
62
|
+
|
|
63
|
+
parser.each do |data|
|
|
64
|
+
assert data
|
|
65
|
+
count += 1
|
|
66
|
+
assert_equal 2 * count, parser.called
|
|
67
|
+
assert_equal 1 * count, parser.once_called
|
|
68
|
+
assert_equal 1 * count, parser.first_called
|
|
69
|
+
array << data
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
assert_equal({
|
|
73
|
+
"first" => "parsha",
|
|
74
|
+
"last" => "pourkhomami",
|
|
75
|
+
}, array[0])
|
|
76
|
+
|
|
77
|
+
assert_equal({
|
|
78
|
+
"first" => "hey",
|
|
79
|
+
"last" => "you",
|
|
80
|
+
}, array[1])
|
|
81
|
+
|
|
82
|
+
assert_equal 2, count
|
|
83
|
+
assert_equal 2, array.length
|
|
84
|
+
assert_equal 4, parser.called
|
|
85
|
+
assert_equal 2, parser.once_called
|
|
86
|
+
assert_equal 2, parser.first_called
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def test_regexp
|
|
90
|
+
array = RegexpParser.new(csv).to_a
|
|
91
|
+
assert_equal 2, array.length
|
|
92
|
+
assert_equal({ "first" => "parsha" }, array[0])
|
|
93
|
+
assert_equal({ "first" => "hey" }, array[1])
|
|
94
|
+
end
|
|
95
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: parshap-csv_parser
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Parsha Pourkhomami
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2013-12-31 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: test-unit
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - '>='
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - '>='
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
description:
|
|
28
|
+
email:
|
|
29
|
+
- parshap+gem@gmail.com
|
|
30
|
+
executables: []
|
|
31
|
+
extensions: []
|
|
32
|
+
extra_rdoc_files: []
|
|
33
|
+
files:
|
|
34
|
+
- Gemfile
|
|
35
|
+
- README.md
|
|
36
|
+
- csv_parser.rb
|
|
37
|
+
- example.rb
|
|
38
|
+
- parshap-csv_parser.gemspec
|
|
39
|
+
- test.rb
|
|
40
|
+
homepage: https://github.com/parshap/csv-parser
|
|
41
|
+
licenses:
|
|
42
|
+
- Public Domain
|
|
43
|
+
metadata: {}
|
|
44
|
+
post_install_message:
|
|
45
|
+
rdoc_options: []
|
|
46
|
+
require_paths:
|
|
47
|
+
- .
|
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
49
|
+
requirements:
|
|
50
|
+
- - '>='
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: '0'
|
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
54
|
+
requirements:
|
|
55
|
+
- - '>='
|
|
56
|
+
- !ruby/object:Gem::Version
|
|
57
|
+
version: '0'
|
|
58
|
+
requirements: []
|
|
59
|
+
rubyforge_project:
|
|
60
|
+
rubygems_version: 2.0.3
|
|
61
|
+
signing_key:
|
|
62
|
+
specification_version: 4
|
|
63
|
+
summary: High-level CSV parser
|
|
64
|
+
test_files:
|
|
65
|
+
- test.rb
|