ingestor 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +0 -1
- data/README.md +131 -123
- data/ingestor.gemspec +2 -2
- data/lib/ingestor.rb +0 -2
- data/lib/ingestor/proxy.rb +1 -1
- data/lib/ingestor/version.rb +1 -1
- data/spec/orm/active_record.rb +0 -2
- metadata +49 -33
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ebbcd702acac5ab4cc68de903353a86a8b45986c
|
4
|
+
data.tar.gz: 6c988a13dacacbbfadb7e549e8533ecde2b2b84c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 26bfdebcf5450d0b27debd828ece5744a06d3020a95bf86dc8cf8db8e27f7e8b5e5b37ae28d67702d58247a2c0fed6c42dbe682b3a95eaf2b1585aa02a06ba1a
|
7
|
+
data.tar.gz: cd17329c4865444f1329fdbf213af79bfc5bca430db4061f3f24b06553ec8481882521bab30223cd7e4a6872e5e8b679e1c5964138639705ab5c32db81f8c9f9
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
A simple DSL for importing data from text and csv files to ActiveRecord. This was originally designed to
|
4
4
|
continually import changing data from EAN and Geonames.
|
5
5
|
|
6
|
+
Great for parsing JSON, XML, CSV and plaint text into ActiveRecord, if you
|
7
|
+
need to scrape HTML into ActiveRecord check out [klepto](http://github.com/coryodaniel/klepto).
|
8
|
+
|
6
9
|
## Installation
|
7
10
|
|
8
11
|
Add this line to your application's Gemfile:
|
@@ -30,149 +33,155 @@ Add the following to your Rakefile
|
|
30
33
|
3|United States|315,550,000
|
31
34
|
|
32
35
|
And an AR Class:
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
36
|
+
```ruby
|
37
|
+
class Country
|
38
|
+
attr_accessible :name, :population
|
39
|
+
end
|
40
|
+
```
|
37
41
|
|
38
42
|
Sync the file with AR:
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
}
|
47
|
-
end
|
48
|
-
|
49
|
-
# current lines values
|
50
|
-
finder{|attrs|
|
51
|
-
Country.where(id: attrs[:id]).first || Country.new
|
43
|
+
```ruby
|
44
|
+
ingest("path/to/countries.txt") do
|
45
|
+
map_attributes do |values|
|
46
|
+
{
|
47
|
+
id: values[0],
|
48
|
+
name: values[1],
|
49
|
+
population: values[2]
|
52
50
|
}
|
53
51
|
end
|
54
52
|
|
55
|
-
|
53
|
+
# current lines values
|
54
|
+
finder{|attrs|
|
55
|
+
Country.where(id: attrs[:id]).first || Country.new
|
56
|
+
}
|
57
|
+
end
|
58
|
+
```
|
56
59
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# current lines values
|
68
|
-
finder{|attrs|
|
69
|
-
Country.where(id: attrs[:id]).first || Country.new
|
60
|
+
It can handle remote files and zip files as well.
|
61
|
+
```ruby
|
62
|
+
ingest("http://example.com/a_lot_of_countries.zip") do
|
63
|
+
compressed true
|
64
|
+
map_attributes do |values|
|
65
|
+
{
|
66
|
+
id: values[0],
|
67
|
+
name: values[1],
|
68
|
+
population: values[2]
|
70
69
|
}
|
71
70
|
end
|
72
71
|
|
73
|
-
|
72
|
+
# current lines values
|
73
|
+
finder{|attrs|
|
74
|
+
Country.where(id: attrs[:id]).first || Country.new
|
75
|
+
}
|
76
|
+
end
|
77
|
+
```
|
74
78
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
79
|
+
It can handle XML, JSON, and more...
|
80
|
+
```ruby
|
81
|
+
require 'ingestor/parser/xml'
|
82
|
+
ingest("http://example.com/books.xml") do
|
83
|
+
parser :xml
|
84
|
+
parser_options xpath: '//book'
|
85
|
+
map_attributes do |values|
|
86
|
+
{
|
87
|
+
id: values['id'],
|
88
|
+
title: values['title'],
|
89
|
+
author: {
|
90
|
+
name: values['author']
|
86
91
|
}
|
87
|
-
end
|
88
|
-
|
89
|
-
# current lines values
|
90
|
-
finder{|attrs|
|
91
|
-
Book.where(id: attrs[:id]).first || Book.new
|
92
92
|
}
|
93
|
+
end
|
93
94
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
95
|
+
# current lines values
|
96
|
+
finder{|attrs|
|
97
|
+
Book.where(id: attrs[:id]).first || Book.new
|
98
|
+
}
|
99
|
+
|
100
|
+
processor{|attrs,record|
|
101
|
+
record.update_attributes(attrs)
|
102
|
+
record.reviews.create({
|
103
|
+
stars: 5,
|
104
|
+
comment: "Every book they sell is so great!"
|
105
|
+
})
|
106
|
+
}
|
107
|
+
end
|
108
|
+
```
|
102
109
|
|
103
110
|
CSV Example
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
}
|
131
|
-
end
|
132
|
-
|
133
|
-
# before{|attrs| attrs}
|
134
|
-
|
135
|
-
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
136
|
-
finder{|attrs|
|
137
|
-
Contract.new
|
138
|
-
}
|
139
|
-
|
140
|
-
processor{|attrs,record|
|
141
|
-
# ... custom processor here ...
|
142
|
-
record.update_attributes attrs
|
143
|
-
}
|
144
|
-
|
145
|
-
after{|record|
|
146
|
-
puts "Created: #{record.summary}"
|
111
|
+
```ruby
|
112
|
+
require 'ingestor/parser/csv'
|
113
|
+
ingest "./samples/contracts.csv" do
|
114
|
+
parser :csv
|
115
|
+
|
116
|
+
# all options come directly from Ruby core CSV class
|
117
|
+
parser_options :headers => true,
|
118
|
+
:col_sep => ",",
|
119
|
+
:row_sep => :auto,
|
120
|
+
:quote_char => '"',
|
121
|
+
:field_size_limit => nil,
|
122
|
+
:converters => nil,
|
123
|
+
:unconverted_fields => nil,
|
124
|
+
:return_headers => false,
|
125
|
+
:header_converters => nil,
|
126
|
+
:skip_blanks => false,
|
127
|
+
:force_quotes => false
|
128
|
+
|
129
|
+
# How to map out the columns from text to AR
|
130
|
+
map_attributes do |row|
|
131
|
+
{
|
132
|
+
id: row[0],
|
133
|
+
seller_name: row[1],
|
134
|
+
customer_name: row[2],
|
135
|
+
commencement_date: row[7],
|
136
|
+
termination_date: row[8]
|
147
137
|
}
|
148
|
-
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# before{|attrs| attrs}
|
141
|
+
|
142
|
+
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
143
|
+
finder{|attrs|
|
144
|
+
Contract.new
|
145
|
+
}
|
146
|
+
|
147
|
+
processor{|attrs,record|
|
148
|
+
# ... custom processor here ...
|
149
|
+
record.update_attributes attrs
|
150
|
+
}
|
151
|
+
|
152
|
+
after{|record|
|
153
|
+
puts "Created: #{record.summary}"
|
154
|
+
}
|
155
|
+
end
|
156
|
+
```
|
149
157
|
|
150
158
|
JSON Example
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
159
|
+
```ruby
|
160
|
+
require 'ingestor/parser/json'
|
161
|
+
ingest("http://example.com/people.json") do
|
162
|
+
parser :json
|
163
|
+
parser_options collection: lambda{|document|
|
164
|
+
document['people']
|
165
|
+
}
|
166
|
+
map_attributes do |values|
|
167
|
+
{
|
168
|
+
name: values["first_name"] + " " + values["last_name"]
|
169
|
+
age: values['age'],
|
170
|
+
address: values['address']
|
157
171
|
}
|
158
|
-
|
159
|
-
{
|
160
|
-
name: values["first_name"] + " " + values["last_name"]
|
161
|
-
age: values['age'],
|
162
|
-
address: values['address']
|
163
|
-
}
|
164
|
-
end
|
172
|
+
end
|
165
173
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
174
|
+
# current lines values
|
175
|
+
finder{|attrs|
|
176
|
+
Person.where(name: attrs[:name]).first || Person.new
|
177
|
+
}
|
170
178
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
179
|
+
processor{|attrs,record|
|
180
|
+
record.update_attributes(attrs)
|
181
|
+
record.send_junk_mail!
|
182
|
+
}
|
183
|
+
end
|
184
|
+
```
|
176
185
|
|
177
186
|
|
178
187
|
## Advanced Usage
|
@@ -285,7 +294,6 @@ Coming soon...
|
|
285
294
|
|
286
295
|
|
287
296
|
## Todos
|
288
|
-
* HTML processor should use capybara, include a "setup method" for logging in, filling out, etc...
|
289
297
|
* Deprecate plain_text (this was the first thing I created)
|
290
298
|
* rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
|
291
299
|
* Move includes_header to CSV, PlainText
|
data/ingestor.gemspec
CHANGED
@@ -17,9 +17,9 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
gem.add_dependency "docile"
|
20
|
-
gem.add_dependency "rubyzip"
|
20
|
+
gem.add_dependency "rubyzip", '< 1.0.0'
|
21
21
|
gem.add_dependency "thor"
|
22
|
-
gem.add_dependency "nokogiri", '
|
22
|
+
gem.add_dependency "nokogiri", '> 1.5.6'
|
23
23
|
#gem.add_dependency "activesupport", '>= 3.2.0'
|
24
24
|
gem.add_dependency "activesupport"
|
25
25
|
gem.add_dependency 'multi_json', '~> 1.0'
|
data/lib/ingestor.rb
CHANGED
data/lib/ingestor/proxy.rb
CHANGED
data/lib/ingestor/version.rb
CHANGED
data/spec/orm/active_record.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'active_record'
|
3
3
|
|
4
|
-
ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
|
5
4
|
ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
|
6
5
|
|
7
6
|
ActiveRecord::Migration.verbose = false
|
@@ -29,5 +28,4 @@ end
|
|
29
28
|
|
30
29
|
class Dummy < ActiveRecord::Base;end;
|
31
30
|
class Country < ActiveRecord::Base
|
32
|
-
attr_protected :secrets
|
33
31
|
end
|
metadata
CHANGED
@@ -1,82 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ingestor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.8
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Cory O'Daniel
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-11-13 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: docile
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rubyzip
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - <
|
31
32
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
+
version: 1.0.0
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - <
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.0.0
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: thor
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - '>='
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :runtime
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: nokogiri
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - '>'
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: 1.5.6
|
55
62
|
type: :runtime
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>'
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.5.6
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: activesupport
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
|
-
- -
|
73
|
+
- - '>='
|
64
74
|
- !ruby/object:Gem::Version
|
65
75
|
version: '0'
|
66
76
|
type: :runtime
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: multi_json
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
87
|
- - ~>
|
75
88
|
- !ruby/object:Gem::Version
|
76
89
|
version: '1.0'
|
77
90
|
type: :runtime
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.0'
|
80
97
|
description: Ingesting local and remote data files into ActiveRecord
|
81
98
|
email:
|
82
99
|
- github@coryodaniel.com
|
@@ -130,27 +147,26 @@ files:
|
|
130
147
|
- spec/spec_helper.rb
|
131
148
|
homepage: http://github.com/coryodaniel/ingestor
|
132
149
|
licenses: []
|
150
|
+
metadata: {}
|
133
151
|
post_install_message:
|
134
152
|
rdoc_options: []
|
135
153
|
require_paths:
|
136
154
|
- lib
|
137
155
|
required_ruby_version: !ruby/object:Gem::Requirement
|
138
|
-
none: false
|
139
156
|
requirements:
|
140
|
-
- -
|
157
|
+
- - '>='
|
141
158
|
- !ruby/object:Gem::Version
|
142
159
|
version: '0'
|
143
160
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
|
-
none: false
|
145
161
|
requirements:
|
146
|
-
- -
|
162
|
+
- - '>='
|
147
163
|
- !ruby/object:Gem::Version
|
148
164
|
version: '0'
|
149
165
|
requirements: []
|
150
166
|
rubyforge_project:
|
151
|
-
rubygems_version:
|
167
|
+
rubygems_version: 2.0.6
|
152
168
|
signing_key:
|
153
|
-
specification_version:
|
169
|
+
specification_version: 4
|
154
170
|
summary: Ingesting local and remote data files into ActiveRecord
|
155
171
|
test_files:
|
156
172
|
- spec/cassettes/remote-zipped-files.yml
|