ingestor 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ebbcd702acac5ab4cc68de903353a86a8b45986c
4
+ data.tar.gz: 6c988a13dacacbbfadb7e549e8533ecde2b2b84c
5
+ SHA512:
6
+ metadata.gz: 26bfdebcf5450d0b27debd828ece5744a06d3020a95bf86dc8cf8db8e27f7e8b5e5b37ae28d67702d58247a2c0fed6c42dbe682b3a95eaf2b1585aa02a06ba1a
7
+ data.tar.gz: cd17329c4865444f1329fdbf213af79bfc5bca430db4061f3f24b06553ec8481882521bab30223cd7e4a6872e5e8b679e1c5964138639705ab5c32db81f8c9f9
data/Gemfile CHANGED
@@ -7,7 +7,6 @@ gem 'rb-fsevent'
7
7
  gem "guard", '1.6.1'
8
8
  gem "guard-bundler"
9
9
  gem "guard-rspec"
10
- gem 'debugger', '1.2.0'
11
10
  gem 'vcr'
12
11
  gem 'fakeweb'
13
12
  #gem 'activerecord', "~>3.2.0"
data/README.md CHANGED
@@ -3,6 +3,9 @@
3
3
  A simple DSL for importing data from text and csv files to ActiveRecord. This was originally designed to
4
4
  continually import changing data from EAN and Geonames.
5
5
 
6
+ Great for parsing JSON, XML, CSV and plaint text into ActiveRecord, if you
7
+ need to scrape HTML into ActiveRecord check out [klepto](http://github.com/coryodaniel/klepto).
8
+
6
9
  ## Installation
7
10
 
8
11
  Add this line to your application's Gemfile:
@@ -30,149 +33,155 @@ Add the following to your Rakefile
30
33
  3|United States|315,550,000
31
34
 
32
35
  And an AR Class:
33
-
34
- class Country
35
- attr_accessible :name, :population
36
- end
36
+ ```ruby
37
+ class Country
38
+ attr_accessible :name, :population
39
+ end
40
+ ```
37
41
 
38
42
  Sync the file with AR:
39
-
40
- ingest("path/to/countries.txt") do
41
- map_attributes do |values|
42
- {
43
- id: values[0],
44
- name: values[1],
45
- population: values[2]
46
- }
47
- end
48
-
49
- # current lines values
50
- finder{|attrs|
51
- Country.where(id: attrs[:id]).first || Country.new
43
+ ```ruby
44
+ ingest("path/to/countries.txt") do
45
+ map_attributes do |values|
46
+ {
47
+ id: values[0],
48
+ name: values[1],
49
+ population: values[2]
52
50
  }
53
51
  end
54
52
 
55
- It can handle remote files and zip files as well.
53
+ # current lines values
54
+ finder{|attrs|
55
+ Country.where(id: attrs[:id]).first || Country.new
56
+ }
57
+ end
58
+ ```
56
59
 
57
- ingest("http://example.com/a_lot_of_countries.zip") do
58
- compressed true
59
- map_attributes do |values|
60
- {
61
- id: values[0],
62
- name: values[1],
63
- population: values[2]
64
- }
65
- end
66
-
67
- # current lines values
68
- finder{|attrs|
69
- Country.where(id: attrs[:id]).first || Country.new
60
+ It can handle remote files and zip files as well.
61
+ ```ruby
62
+ ingest("http://example.com/a_lot_of_countries.zip") do
63
+ compressed true
64
+ map_attributes do |values|
65
+ {
66
+ id: values[0],
67
+ name: values[1],
68
+ population: values[2]
70
69
  }
71
70
  end
72
71
 
73
- It can handle XML, JSON, and more...
72
+ # current lines values
73
+ finder{|attrs|
74
+ Country.where(id: attrs[:id]).first || Country.new
75
+ }
76
+ end
77
+ ```
74
78
 
75
- require 'ingestor/parser/xml'
76
- ingest("http://example.com/books.xml") do
77
- parser :xml
78
- parser_options xpath: '//book'
79
- map_attributes do |values|
80
- {
81
- id: values['id'],
82
- title: values['title'],
83
- author: {
84
- name: values['author']
85
- }
79
+ It can handle XML, JSON, and more...
80
+ ```ruby
81
+ require 'ingestor/parser/xml'
82
+ ingest("http://example.com/books.xml") do
83
+ parser :xml
84
+ parser_options xpath: '//book'
85
+ map_attributes do |values|
86
+ {
87
+ id: values['id'],
88
+ title: values['title'],
89
+ author: {
90
+ name: values['author']
86
91
  }
87
- end
88
-
89
- # current lines values
90
- finder{|attrs|
91
- Book.where(id: attrs[:id]).first || Book.new
92
92
  }
93
+ end
93
94
 
94
- processor{|attrs,record|
95
- record.update_attributes(attrs)
96
- record.reviews.create({
97
- stars: 5,
98
- comment: "Every book they sell is so great!"
99
- })
100
- }
101
- end
95
+ # current lines values
96
+ finder{|attrs|
97
+ Book.where(id: attrs[:id]).first || Book.new
98
+ }
99
+
100
+ processor{|attrs,record|
101
+ record.update_attributes(attrs)
102
+ record.reviews.create({
103
+ stars: 5,
104
+ comment: "Every book they sell is so great!"
105
+ })
106
+ }
107
+ end
108
+ ```
102
109
 
103
110
  CSV Example
104
-
105
- require 'ingestor/parser/csv'
106
- ingest "./samples/contracts.csv" do
107
- parser :csv
108
-
109
- # all options come directly from Ruby core CSV class
110
- parser_options :headers => true,
111
- :col_sep => ",",
112
- :row_sep => :auto,
113
- :quote_char => '"',
114
- :field_size_limit => nil,
115
- :converters => nil,
116
- :unconverted_fields => nil,
117
- :return_headers => false,
118
- :header_converters => nil,
119
- :skip_blanks => false,
120
- :force_quotes => false
121
-
122
- # How to map out the columns from text to AR
123
- map_attributes do |row|
124
- {
125
- id: row[0],
126
- seller_name: row[1],
127
- customer_name: row[2],
128
- commencement_date: row[7],
129
- termination_date: row[8]
130
- }
131
- end
132
-
133
- # before{|attrs| attrs}
134
-
135
- # Your strategy for finding or instantiating a new object to be handled by the processor block
136
- finder{|attrs|
137
- Contract.new
138
- }
139
-
140
- processor{|attrs,record|
141
- # ... custom processor here ...
142
- record.update_attributes attrs
143
- }
144
-
145
- after{|record|
146
- puts "Created: #{record.summary}"
111
+ ```ruby
112
+ require 'ingestor/parser/csv'
113
+ ingest "./samples/contracts.csv" do
114
+ parser :csv
115
+
116
+ # all options come directly from Ruby core CSV class
117
+ parser_options :headers => true,
118
+ :col_sep => ",",
119
+ :row_sep => :auto,
120
+ :quote_char => '"',
121
+ :field_size_limit => nil,
122
+ :converters => nil,
123
+ :unconverted_fields => nil,
124
+ :return_headers => false,
125
+ :header_converters => nil,
126
+ :skip_blanks => false,
127
+ :force_quotes => false
128
+
129
+ # How to map out the columns from text to AR
130
+ map_attributes do |row|
131
+ {
132
+ id: row[0],
133
+ seller_name: row[1],
134
+ customer_name: row[2],
135
+ commencement_date: row[7],
136
+ termination_date: row[8]
147
137
  }
148
- end
138
+ end
139
+
140
+ # before{|attrs| attrs}
141
+
142
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
143
+ finder{|attrs|
144
+ Contract.new
145
+ }
146
+
147
+ processor{|attrs,record|
148
+ # ... custom processor here ...
149
+ record.update_attributes attrs
150
+ }
151
+
152
+ after{|record|
153
+ puts "Created: #{record.summary}"
154
+ }
155
+ end
156
+ ```
149
157
 
150
158
  JSON Example
151
-
152
- require 'ingestor/parser/json'
153
- ingest("http://example.com/people.json") do
154
- parser :json
155
- parser_options collection: lambda{|document|
156
- document['people']
159
+ ```ruby
160
+ require 'ingestor/parser/json'
161
+ ingest("http://example.com/people.json") do
162
+ parser :json
163
+ parser_options collection: lambda{|document|
164
+ document['people']
165
+ }
166
+ map_attributes do |values|
167
+ {
168
+ name: values["first_name"] + " " + values["last_name"]
169
+ age: values['age'],
170
+ address: values['address']
157
171
  }
158
- map_attributes do |values|
159
- {
160
- name: values["first_name"] + " " + values["last_name"]
161
- age: values['age'],
162
- address: values['address']
163
- }
164
- end
172
+ end
165
173
 
166
- # current lines values
167
- finder{|attrs|
168
- Person.where(name: attrs[:name]).first || Person.new
169
- }
174
+ # current lines values
175
+ finder{|attrs|
176
+ Person.where(name: attrs[:name]).first || Person.new
177
+ }
170
178
 
171
- processor{|attrs,record|
172
- record.update_attributes(attrs)
173
- record.send_junk_mail!
174
- }
175
- end
179
+ processor{|attrs,record|
180
+ record.update_attributes(attrs)
181
+ record.send_junk_mail!
182
+ }
183
+ end
184
+ ```
176
185
 
177
186
 
178
187
  ## Advanced Usage
@@ -285,7 +294,6 @@ Coming soon...
285
294
 
286
295
 
287
296
  ## Todos
288
- * HTML processor should use capybara, include a "setup method" for logging in, filling out, etc...
289
297
  * Deprecate plain_text (this was the first thing I created)
290
298
  * rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
291
299
  * Move includes_header to CSV, PlainText
data/ingestor.gemspec CHANGED
@@ -17,9 +17,9 @@ Gem::Specification.new do |gem|
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
  gem.add_dependency "docile"
20
- gem.add_dependency "rubyzip"
20
+ gem.add_dependency "rubyzip", '< 1.0.0'
21
21
  gem.add_dependency "thor"
22
- gem.add_dependency "nokogiri", '~> 1.5.6'
22
+ gem.add_dependency "nokogiri", '> 1.5.6'
23
23
  #gem.add_dependency "activesupport", '>= 3.2.0'
24
24
  gem.add_dependency "activesupport"
25
25
  gem.add_dependency 'multi_json', '~> 1.0'
data/lib/ingestor.rb CHANGED
@@ -7,8 +7,6 @@ require 'ingestor/version'
7
7
  require 'ingestor/proxy'
8
8
  require 'ingestor/dsl'
9
9
 
10
- #require 'debugger'
11
-
12
10
  module Ingestor
13
11
  LOG = Logger.new(STDOUT)
14
12
  LOG.level = Logger::WARN
@@ -69,7 +69,7 @@ module Ingestor
69
69
  end
70
70
 
71
71
  def default_processor(attrs,record)
72
- record.update_attributes( attrs, :without_protection => true )
72
+ record.update_attributes( attrs )
73
73
  end
74
74
 
75
75
  def load_remote
@@ -1,3 +1,3 @@
1
1
  module Ingestor
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'active_record'
3
3
 
4
- ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
5
4
  ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
6
5
 
7
6
  ActiveRecord::Migration.verbose = false
@@ -29,5 +28,4 @@ end
29
28
 
30
29
  class Dummy < ActiveRecord::Base;end;
31
30
  class Country < ActiveRecord::Base
32
- attr_protected :secrets
33
31
  end
metadata CHANGED
@@ -1,82 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingestor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
5
- prerelease:
4
+ version: 0.1.8
6
5
  platform: ruby
7
6
  authors:
8
7
  - Cory O'Daniel
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-03-20 00:00:00.000000000 Z
11
+ date: 2013-11-13 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: docile
16
- requirement: &70217015936720 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
- version_requirements: *70217015936720
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: rubyzip
27
- requirement: &70217015935700 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - ! '>='
31
+ - - <
31
32
  - !ruby/object:Gem::Version
32
- version: '0'
33
+ version: 1.0.0
33
34
  type: :runtime
34
35
  prerelease: false
35
- version_requirements: *70217015935700
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - <
39
+ - !ruby/object:Gem::Version
40
+ version: 1.0.0
36
41
  - !ruby/object:Gem::Dependency
37
42
  name: thor
38
- requirement: &70217015934600 !ruby/object:Gem::Requirement
39
- none: false
43
+ requirement: !ruby/object:Gem::Requirement
40
44
  requirements:
41
- - - ! '>='
45
+ - - '>='
42
46
  - !ruby/object:Gem::Version
43
47
  version: '0'
44
48
  type: :runtime
45
49
  prerelease: false
46
- version_requirements: *70217015934600
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
47
55
  - !ruby/object:Gem::Dependency
48
56
  name: nokogiri
49
- requirement: &70217015932840 !ruby/object:Gem::Requirement
50
- none: false
57
+ requirement: !ruby/object:Gem::Requirement
51
58
  requirements:
52
- - - ~>
59
+ - - '>'
53
60
  - !ruby/object:Gem::Version
54
61
  version: 1.5.6
55
62
  type: :runtime
56
63
  prerelease: false
57
- version_requirements: *70217015932840
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>'
67
+ - !ruby/object:Gem::Version
68
+ version: 1.5.6
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: activesupport
60
- requirement: &70217015931000 !ruby/object:Gem::Requirement
61
- none: false
71
+ requirement: !ruby/object:Gem::Requirement
62
72
  requirements:
63
- - - ! '>='
73
+ - - '>='
64
74
  - !ruby/object:Gem::Version
65
75
  version: '0'
66
76
  type: :runtime
67
77
  prerelease: false
68
- version_requirements: *70217015931000
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: multi_json
71
- requirement: &70217015930280 !ruby/object:Gem::Requirement
72
- none: false
85
+ requirement: !ruby/object:Gem::Requirement
73
86
  requirements:
74
87
  - - ~>
75
88
  - !ruby/object:Gem::Version
76
89
  version: '1.0'
77
90
  type: :runtime
78
91
  prerelease: false
79
- version_requirements: *70217015930280
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ~>
95
+ - !ruby/object:Gem::Version
96
+ version: '1.0'
80
97
  description: Ingesting local and remote data files into ActiveRecord
81
98
  email:
82
99
  - github@coryodaniel.com
@@ -130,27 +147,26 @@ files:
130
147
  - spec/spec_helper.rb
131
148
  homepage: http://github.com/coryodaniel/ingestor
132
149
  licenses: []
150
+ metadata: {}
133
151
  post_install_message:
134
152
  rdoc_options: []
135
153
  require_paths:
136
154
  - lib
137
155
  required_ruby_version: !ruby/object:Gem::Requirement
138
- none: false
139
156
  requirements:
140
- - - ! '>='
157
+ - - '>='
141
158
  - !ruby/object:Gem::Version
142
159
  version: '0'
143
160
  required_rubygems_version: !ruby/object:Gem::Requirement
144
- none: false
145
161
  requirements:
146
- - - ! '>='
162
+ - - '>='
147
163
  - !ruby/object:Gem::Version
148
164
  version: '0'
149
165
  requirements: []
150
166
  rubyforge_project:
151
- rubygems_version: 1.8.10
167
+ rubygems_version: 2.0.6
152
168
  signing_key:
153
- specification_version: 3
169
+ specification_version: 4
154
170
  summary: Ingesting local and remote data files into ActiveRecord
155
171
  test_files:
156
172
  - spec/cassettes/remote-zipped-files.yml