ingestor 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +0 -1
- data/README.md +131 -123
- data/ingestor.gemspec +2 -2
- data/lib/ingestor.rb +0 -2
- data/lib/ingestor/proxy.rb +1 -1
- data/lib/ingestor/version.rb +1 -1
- data/spec/orm/active_record.rb +0 -2
- metadata +49 -33
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ebbcd702acac5ab4cc68de903353a86a8b45986c
|
4
|
+
data.tar.gz: 6c988a13dacacbbfadb7e549e8533ecde2b2b84c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 26bfdebcf5450d0b27debd828ece5744a06d3020a95bf86dc8cf8db8e27f7e8b5e5b37ae28d67702d58247a2c0fed6c42dbe682b3a95eaf2b1585aa02a06ba1a
|
7
|
+
data.tar.gz: cd17329c4865444f1329fdbf213af79bfc5bca430db4061f3f24b06553ec8481882521bab30223cd7e4a6872e5e8b679e1c5964138639705ab5c32db81f8c9f9
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
A simple DSL for importing data from text and csv files to ActiveRecord. This was originally designed to
|
4
4
|
continually import changing data from EAN and Geonames.
|
5
5
|
|
6
|
+
Great for parsing JSON, XML, CSV and plaint text into ActiveRecord, if you
|
7
|
+
need to scrape HTML into ActiveRecord check out [klepto](http://github.com/coryodaniel/klepto).
|
8
|
+
|
6
9
|
## Installation
|
7
10
|
|
8
11
|
Add this line to your application's Gemfile:
|
@@ -30,149 +33,155 @@ Add the following to your Rakefile
|
|
30
33
|
3|United States|315,550,000
|
31
34
|
|
32
35
|
And an AR Class:
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
36
|
+
```ruby
|
37
|
+
class Country
|
38
|
+
attr_accessible :name, :population
|
39
|
+
end
|
40
|
+
```
|
37
41
|
|
38
42
|
Sync the file with AR:
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
}
|
47
|
-
end
|
48
|
-
|
49
|
-
# current lines values
|
50
|
-
finder{|attrs|
|
51
|
-
Country.where(id: attrs[:id]).first || Country.new
|
43
|
+
```ruby
|
44
|
+
ingest("path/to/countries.txt") do
|
45
|
+
map_attributes do |values|
|
46
|
+
{
|
47
|
+
id: values[0],
|
48
|
+
name: values[1],
|
49
|
+
population: values[2]
|
52
50
|
}
|
53
51
|
end
|
54
52
|
|
55
|
-
|
53
|
+
# current lines values
|
54
|
+
finder{|attrs|
|
55
|
+
Country.where(id: attrs[:id]).first || Country.new
|
56
|
+
}
|
57
|
+
end
|
58
|
+
```
|
56
59
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# current lines values
|
68
|
-
finder{|attrs|
|
69
|
-
Country.where(id: attrs[:id]).first || Country.new
|
60
|
+
It can handle remote files and zip files as well.
|
61
|
+
```ruby
|
62
|
+
ingest("http://example.com/a_lot_of_countries.zip") do
|
63
|
+
compressed true
|
64
|
+
map_attributes do |values|
|
65
|
+
{
|
66
|
+
id: values[0],
|
67
|
+
name: values[1],
|
68
|
+
population: values[2]
|
70
69
|
}
|
71
70
|
end
|
72
71
|
|
73
|
-
|
72
|
+
# current lines values
|
73
|
+
finder{|attrs|
|
74
|
+
Country.where(id: attrs[:id]).first || Country.new
|
75
|
+
}
|
76
|
+
end
|
77
|
+
```
|
74
78
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
79
|
+
It can handle XML, JSON, and more...
|
80
|
+
```ruby
|
81
|
+
require 'ingestor/parser/xml'
|
82
|
+
ingest("http://example.com/books.xml") do
|
83
|
+
parser :xml
|
84
|
+
parser_options xpath: '//book'
|
85
|
+
map_attributes do |values|
|
86
|
+
{
|
87
|
+
id: values['id'],
|
88
|
+
title: values['title'],
|
89
|
+
author: {
|
90
|
+
name: values['author']
|
86
91
|
}
|
87
|
-
end
|
88
|
-
|
89
|
-
# current lines values
|
90
|
-
finder{|attrs|
|
91
|
-
Book.where(id: attrs[:id]).first || Book.new
|
92
92
|
}
|
93
|
+
end
|
93
94
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
95
|
+
# current lines values
|
96
|
+
finder{|attrs|
|
97
|
+
Book.where(id: attrs[:id]).first || Book.new
|
98
|
+
}
|
99
|
+
|
100
|
+
processor{|attrs,record|
|
101
|
+
record.update_attributes(attrs)
|
102
|
+
record.reviews.create({
|
103
|
+
stars: 5,
|
104
|
+
comment: "Every book they sell is so great!"
|
105
|
+
})
|
106
|
+
}
|
107
|
+
end
|
108
|
+
```
|
102
109
|
|
103
110
|
CSV Example
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
}
|
131
|
-
end
|
132
|
-
|
133
|
-
# before{|attrs| attrs}
|
134
|
-
|
135
|
-
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
136
|
-
finder{|attrs|
|
137
|
-
Contract.new
|
138
|
-
}
|
139
|
-
|
140
|
-
processor{|attrs,record|
|
141
|
-
# ... custom processor here ...
|
142
|
-
record.update_attributes attrs
|
143
|
-
}
|
144
|
-
|
145
|
-
after{|record|
|
146
|
-
puts "Created: #{record.summary}"
|
111
|
+
```ruby
|
112
|
+
require 'ingestor/parser/csv'
|
113
|
+
ingest "./samples/contracts.csv" do
|
114
|
+
parser :csv
|
115
|
+
|
116
|
+
# all options come directly from Ruby core CSV class
|
117
|
+
parser_options :headers => true,
|
118
|
+
:col_sep => ",",
|
119
|
+
:row_sep => :auto,
|
120
|
+
:quote_char => '"',
|
121
|
+
:field_size_limit => nil,
|
122
|
+
:converters => nil,
|
123
|
+
:unconverted_fields => nil,
|
124
|
+
:return_headers => false,
|
125
|
+
:header_converters => nil,
|
126
|
+
:skip_blanks => false,
|
127
|
+
:force_quotes => false
|
128
|
+
|
129
|
+
# How to map out the columns from text to AR
|
130
|
+
map_attributes do |row|
|
131
|
+
{
|
132
|
+
id: row[0],
|
133
|
+
seller_name: row[1],
|
134
|
+
customer_name: row[2],
|
135
|
+
commencement_date: row[7],
|
136
|
+
termination_date: row[8]
|
147
137
|
}
|
148
|
-
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# before{|attrs| attrs}
|
141
|
+
|
142
|
+
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
143
|
+
finder{|attrs|
|
144
|
+
Contract.new
|
145
|
+
}
|
146
|
+
|
147
|
+
processor{|attrs,record|
|
148
|
+
# ... custom processor here ...
|
149
|
+
record.update_attributes attrs
|
150
|
+
}
|
151
|
+
|
152
|
+
after{|record|
|
153
|
+
puts "Created: #{record.summary}"
|
154
|
+
}
|
155
|
+
end
|
156
|
+
```
|
149
157
|
|
150
158
|
JSON Example
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
159
|
+
```ruby
|
160
|
+
require 'ingestor/parser/json'
|
161
|
+
ingest("http://example.com/people.json") do
|
162
|
+
parser :json
|
163
|
+
parser_options collection: lambda{|document|
|
164
|
+
document['people']
|
165
|
+
}
|
166
|
+
map_attributes do |values|
|
167
|
+
{
|
168
|
+
name: values["first_name"] + " " + values["last_name"]
|
169
|
+
age: values['age'],
|
170
|
+
address: values['address']
|
157
171
|
}
|
158
|
-
|
159
|
-
{
|
160
|
-
name: values["first_name"] + " " + values["last_name"]
|
161
|
-
age: values['age'],
|
162
|
-
address: values['address']
|
163
|
-
}
|
164
|
-
end
|
172
|
+
end
|
165
173
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
174
|
+
# current lines values
|
175
|
+
finder{|attrs|
|
176
|
+
Person.where(name: attrs[:name]).first || Person.new
|
177
|
+
}
|
170
178
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
179
|
+
processor{|attrs,record|
|
180
|
+
record.update_attributes(attrs)
|
181
|
+
record.send_junk_mail!
|
182
|
+
}
|
183
|
+
end
|
184
|
+
```
|
176
185
|
|
177
186
|
|
178
187
|
## Advanced Usage
|
@@ -285,7 +294,6 @@ Coming soon...
|
|
285
294
|
|
286
295
|
|
287
296
|
## Todos
|
288
|
-
* HTML processor should use capybara, include a "setup method" for logging in, filling out, etc...
|
289
297
|
* Deprecate plain_text (this was the first thing I created)
|
290
298
|
* rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
|
291
299
|
* Move includes_header to CSV, PlainText
|
data/ingestor.gemspec
CHANGED
@@ -17,9 +17,9 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
gem.add_dependency "docile"
|
20
|
-
gem.add_dependency "rubyzip"
|
20
|
+
gem.add_dependency "rubyzip", '< 1.0.0'
|
21
21
|
gem.add_dependency "thor"
|
22
|
-
gem.add_dependency "nokogiri", '
|
22
|
+
gem.add_dependency "nokogiri", '> 1.5.6'
|
23
23
|
#gem.add_dependency "activesupport", '>= 3.2.0'
|
24
24
|
gem.add_dependency "activesupport"
|
25
25
|
gem.add_dependency 'multi_json', '~> 1.0'
|
data/lib/ingestor.rb
CHANGED
data/lib/ingestor/proxy.rb
CHANGED
data/lib/ingestor/version.rb
CHANGED
data/spec/orm/active_record.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'active_record'
|
3
3
|
|
4
|
-
ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
|
5
4
|
ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
|
6
5
|
|
7
6
|
ActiveRecord::Migration.verbose = false
|
@@ -29,5 +28,4 @@ end
|
|
29
28
|
|
30
29
|
class Dummy < ActiveRecord::Base;end;
|
31
30
|
class Country < ActiveRecord::Base
|
32
|
-
attr_protected :secrets
|
33
31
|
end
|
metadata
CHANGED
@@ -1,82 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ingestor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.8
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Cory O'Daniel
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-11-13 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: docile
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rubyzip
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - <
|
31
32
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
+
version: 1.0.0
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - <
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.0.0
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: thor
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - '>='
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :runtime
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: nokogiri
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - '>'
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: 1.5.6
|
55
62
|
type: :runtime
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>'
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.5.6
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: activesupport
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
|
-
- -
|
73
|
+
- - '>='
|
64
74
|
- !ruby/object:Gem::Version
|
65
75
|
version: '0'
|
66
76
|
type: :runtime
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: multi_json
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
87
|
- - ~>
|
75
88
|
- !ruby/object:Gem::Version
|
76
89
|
version: '1.0'
|
77
90
|
type: :runtime
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.0'
|
80
97
|
description: Ingesting local and remote data files into ActiveRecord
|
81
98
|
email:
|
82
99
|
- github@coryodaniel.com
|
@@ -130,27 +147,26 @@ files:
|
|
130
147
|
- spec/spec_helper.rb
|
131
148
|
homepage: http://github.com/coryodaniel/ingestor
|
132
149
|
licenses: []
|
150
|
+
metadata: {}
|
133
151
|
post_install_message:
|
134
152
|
rdoc_options: []
|
135
153
|
require_paths:
|
136
154
|
- lib
|
137
155
|
required_ruby_version: !ruby/object:Gem::Requirement
|
138
|
-
none: false
|
139
156
|
requirements:
|
140
|
-
- -
|
157
|
+
- - '>='
|
141
158
|
- !ruby/object:Gem::Version
|
142
159
|
version: '0'
|
143
160
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
|
-
none: false
|
145
161
|
requirements:
|
146
|
-
- -
|
162
|
+
- - '>='
|
147
163
|
- !ruby/object:Gem::Version
|
148
164
|
version: '0'
|
149
165
|
requirements: []
|
150
166
|
rubyforge_project:
|
151
|
-
rubygems_version:
|
167
|
+
rubygems_version: 2.0.6
|
152
168
|
signing_key:
|
153
|
-
specification_version:
|
169
|
+
specification_version: 4
|
154
170
|
summary: Ingesting local and remote data files into ActiveRecord
|
155
171
|
test_files:
|
156
172
|
- spec/cassettes/remote-zipped-files.yml
|