ingestor 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ebbcd702acac5ab4cc68de903353a86a8b45986c
4
+ data.tar.gz: 6c988a13dacacbbfadb7e549e8533ecde2b2b84c
5
+ SHA512:
6
+ metadata.gz: 26bfdebcf5450d0b27debd828ece5744a06d3020a95bf86dc8cf8db8e27f7e8b5e5b37ae28d67702d58247a2c0fed6c42dbe682b3a95eaf2b1585aa02a06ba1a
7
+ data.tar.gz: cd17329c4865444f1329fdbf213af79bfc5bca430db4061f3f24b06553ec8481882521bab30223cd7e4a6872e5e8b679e1c5964138639705ab5c32db81f8c9f9
data/Gemfile CHANGED
@@ -7,7 +7,6 @@ gem 'rb-fsevent'
7
7
  gem "guard", '1.6.1'
8
8
  gem "guard-bundler"
9
9
  gem "guard-rspec"
10
- gem 'debugger', '1.2.0'
11
10
  gem 'vcr'
12
11
  gem 'fakeweb'
13
12
  #gem 'activerecord', "~>3.2.0"
data/README.md CHANGED
@@ -3,6 +3,9 @@
3
3
  A simple DSL for importing data from text and csv files to ActiveRecord. This was originally designed to
4
4
  continually import changing data from EAN and Geonames.
5
5
 
6
+ Great for parsing JSON, XML, CSV and plaint text into ActiveRecord, if you
7
+ need to scrape HTML into ActiveRecord check out [klepto](http://github.com/coryodaniel/klepto).
8
+
6
9
  ## Installation
7
10
 
8
11
  Add this line to your application's Gemfile:
@@ -30,149 +33,155 @@ Add the following to your Rakefile
30
33
  3|United States|315,550,000
31
34
 
32
35
  And an AR Class:
33
-
34
- class Country
35
- attr_accessible :name, :population
36
- end
36
+ ```ruby
37
+ class Country
38
+ attr_accessible :name, :population
39
+ end
40
+ ```
37
41
 
38
42
  Sync the file with AR:
39
-
40
- ingest("path/to/countries.txt") do
41
- map_attributes do |values|
42
- {
43
- id: values[0],
44
- name: values[1],
45
- population: values[2]
46
- }
47
- end
48
-
49
- # current lines values
50
- finder{|attrs|
51
- Country.where(id: attrs[:id]).first || Country.new
43
+ ```ruby
44
+ ingest("path/to/countries.txt") do
45
+ map_attributes do |values|
46
+ {
47
+ id: values[0],
48
+ name: values[1],
49
+ population: values[2]
52
50
  }
53
51
  end
54
52
 
55
- It can handle remote files and zip files as well.
53
+ # current lines values
54
+ finder{|attrs|
55
+ Country.where(id: attrs[:id]).first || Country.new
56
+ }
57
+ end
58
+ ```
56
59
 
57
- ingest("http://example.com/a_lot_of_countries.zip") do
58
- compressed true
59
- map_attributes do |values|
60
- {
61
- id: values[0],
62
- name: values[1],
63
- population: values[2]
64
- }
65
- end
66
-
67
- # current lines values
68
- finder{|attrs|
69
- Country.where(id: attrs[:id]).first || Country.new
60
+ It can handle remote files and zip files as well.
61
+ ```ruby
62
+ ingest("http://example.com/a_lot_of_countries.zip") do
63
+ compressed true
64
+ map_attributes do |values|
65
+ {
66
+ id: values[0],
67
+ name: values[1],
68
+ population: values[2]
70
69
  }
71
70
  end
72
71
 
73
- It can handle XML, JSON, and more...
72
+ # current lines values
73
+ finder{|attrs|
74
+ Country.where(id: attrs[:id]).first || Country.new
75
+ }
76
+ end
77
+ ```
74
78
 
75
- require 'ingestor/parser/xml'
76
- ingest("http://example.com/books.xml") do
77
- parser :xml
78
- parser_options xpath: '//book'
79
- map_attributes do |values|
80
- {
81
- id: values['id'],
82
- title: values['title'],
83
- author: {
84
- name: values['author']
85
- }
79
+ It can handle XML, JSON, and more...
80
+ ```ruby
81
+ require 'ingestor/parser/xml'
82
+ ingest("http://example.com/books.xml") do
83
+ parser :xml
84
+ parser_options xpath: '//book'
85
+ map_attributes do |values|
86
+ {
87
+ id: values['id'],
88
+ title: values['title'],
89
+ author: {
90
+ name: values['author']
86
91
  }
87
- end
88
-
89
- # current lines values
90
- finder{|attrs|
91
- Book.where(id: attrs[:id]).first || Book.new
92
92
  }
93
+ end
93
94
 
94
- processor{|attrs,record|
95
- record.update_attributes(attrs)
96
- record.reviews.create({
97
- stars: 5,
98
- comment: "Every book they sell is so great!"
99
- })
100
- }
101
- end
95
+ # current lines values
96
+ finder{|attrs|
97
+ Book.where(id: attrs[:id]).first || Book.new
98
+ }
99
+
100
+ processor{|attrs,record|
101
+ record.update_attributes(attrs)
102
+ record.reviews.create({
103
+ stars: 5,
104
+ comment: "Every book they sell is so great!"
105
+ })
106
+ }
107
+ end
108
+ ```
102
109
 
103
110
  CSV Example
104
-
105
- require 'ingestor/parser/csv'
106
- ingest "./samples/contracts.csv" do
107
- parser :csv
108
-
109
- # all options come directly from Ruby core CSV class
110
- parser_options :headers => true,
111
- :col_sep => ",",
112
- :row_sep => :auto,
113
- :quote_char => '"',
114
- :field_size_limit => nil,
115
- :converters => nil,
116
- :unconverted_fields => nil,
117
- :return_headers => false,
118
- :header_converters => nil,
119
- :skip_blanks => false,
120
- :force_quotes => false
121
-
122
- # How to map out the columns from text to AR
123
- map_attributes do |row|
124
- {
125
- id: row[0],
126
- seller_name: row[1],
127
- customer_name: row[2],
128
- commencement_date: row[7],
129
- termination_date: row[8]
130
- }
131
- end
132
-
133
- # before{|attrs| attrs}
134
-
135
- # Your strategy for finding or instantiating a new object to be handled by the processor block
136
- finder{|attrs|
137
- Contract.new
138
- }
139
-
140
- processor{|attrs,record|
141
- # ... custom processor here ...
142
- record.update_attributes attrs
143
- }
144
-
145
- after{|record|
146
- puts "Created: #{record.summary}"
111
+ ```ruby
112
+ require 'ingestor/parser/csv'
113
+ ingest "./samples/contracts.csv" do
114
+ parser :csv
115
+
116
+ # all options come directly from Ruby core CSV class
117
+ parser_options :headers => true,
118
+ :col_sep => ",",
119
+ :row_sep => :auto,
120
+ :quote_char => '"',
121
+ :field_size_limit => nil,
122
+ :converters => nil,
123
+ :unconverted_fields => nil,
124
+ :return_headers => false,
125
+ :header_converters => nil,
126
+ :skip_blanks => false,
127
+ :force_quotes => false
128
+
129
+ # How to map out the columns from text to AR
130
+ map_attributes do |row|
131
+ {
132
+ id: row[0],
133
+ seller_name: row[1],
134
+ customer_name: row[2],
135
+ commencement_date: row[7],
136
+ termination_date: row[8]
147
137
  }
148
- end
138
+ end
139
+
140
+ # before{|attrs| attrs}
141
+
142
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
143
+ finder{|attrs|
144
+ Contract.new
145
+ }
146
+
147
+ processor{|attrs,record|
148
+ # ... custom processor here ...
149
+ record.update_attributes attrs
150
+ }
151
+
152
+ after{|record|
153
+ puts "Created: #{record.summary}"
154
+ }
155
+ end
156
+ ```
149
157
 
150
158
  JSON Example
151
-
152
- require 'ingestor/parser/json'
153
- ingest("http://example.com/people.json") do
154
- parser :json
155
- parser_options collection: lambda{|document|
156
- document['people']
159
+ ```ruby
160
+ require 'ingestor/parser/json'
161
+ ingest("http://example.com/people.json") do
162
+ parser :json
163
+ parser_options collection: lambda{|document|
164
+ document['people']
165
+ }
166
+ map_attributes do |values|
167
+ {
168
+ name: values["first_name"] + " " + values["last_name"]
169
+ age: values['age'],
170
+ address: values['address']
157
171
  }
158
- map_attributes do |values|
159
- {
160
- name: values["first_name"] + " " + values["last_name"]
161
- age: values['age'],
162
- address: values['address']
163
- }
164
- end
172
+ end
165
173
 
166
- # current lines values
167
- finder{|attrs|
168
- Person.where(name: attrs[:name]).first || Person.new
169
- }
174
+ # current lines values
175
+ finder{|attrs|
176
+ Person.where(name: attrs[:name]).first || Person.new
177
+ }
170
178
 
171
- processor{|attrs,record|
172
- record.update_attributes(attrs)
173
- record.send_junk_mail!
174
- }
175
- end
179
+ processor{|attrs,record|
180
+ record.update_attributes(attrs)
181
+ record.send_junk_mail!
182
+ }
183
+ end
184
+ ```
176
185
 
177
186
 
178
187
  ## Advanced Usage
@@ -285,7 +294,6 @@ Coming soon...
285
294
 
286
295
 
287
296
  ## Todos
288
- * HTML processor should use capybara, include a "setup method" for logging in, filling out, etc...
289
297
  * Deprecate plain_text (this was the first thing I created)
290
298
  * rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
291
299
  * Move includes_header to CSV, PlainText
data/ingestor.gemspec CHANGED
@@ -17,9 +17,9 @@ Gem::Specification.new do |gem|
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
  gem.add_dependency "docile"
20
- gem.add_dependency "rubyzip"
20
+ gem.add_dependency "rubyzip", '< 1.0.0'
21
21
  gem.add_dependency "thor"
22
- gem.add_dependency "nokogiri", '~> 1.5.6'
22
+ gem.add_dependency "nokogiri", '> 1.5.6'
23
23
  #gem.add_dependency "activesupport", '>= 3.2.0'
24
24
  gem.add_dependency "activesupport"
25
25
  gem.add_dependency 'multi_json', '~> 1.0'
data/lib/ingestor.rb CHANGED
@@ -7,8 +7,6 @@ require 'ingestor/version'
7
7
  require 'ingestor/proxy'
8
8
  require 'ingestor/dsl'
9
9
 
10
- #require 'debugger'
11
-
12
10
  module Ingestor
13
11
  LOG = Logger.new(STDOUT)
14
12
  LOG.level = Logger::WARN
@@ -69,7 +69,7 @@ module Ingestor
69
69
  end
70
70
 
71
71
  def default_processor(attrs,record)
72
- record.update_attributes( attrs, :without_protection => true )
72
+ record.update_attributes( attrs )
73
73
  end
74
74
 
75
75
  def load_remote
@@ -1,3 +1,3 @@
1
1
  module Ingestor
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'active_record'
3
3
 
4
- ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
5
4
  ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
6
5
 
7
6
  ActiveRecord::Migration.verbose = false
@@ -29,5 +28,4 @@ end
29
28
 
30
29
  class Dummy < ActiveRecord::Base;end;
31
30
  class Country < ActiveRecord::Base
32
- attr_protected :secrets
33
31
  end
metadata CHANGED
@@ -1,82 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingestor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
5
- prerelease:
4
+ version: 0.1.8
6
5
  platform: ruby
7
6
  authors:
8
7
  - Cory O'Daniel
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-03-20 00:00:00.000000000 Z
11
+ date: 2013-11-13 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: docile
16
- requirement: &70217015936720 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
- version_requirements: *70217015936720
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: rubyzip
27
- requirement: &70217015935700 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - ! '>='
31
+ - - <
31
32
  - !ruby/object:Gem::Version
32
- version: '0'
33
+ version: 1.0.0
33
34
  type: :runtime
34
35
  prerelease: false
35
- version_requirements: *70217015935700
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - <
39
+ - !ruby/object:Gem::Version
40
+ version: 1.0.0
36
41
  - !ruby/object:Gem::Dependency
37
42
  name: thor
38
- requirement: &70217015934600 !ruby/object:Gem::Requirement
39
- none: false
43
+ requirement: !ruby/object:Gem::Requirement
40
44
  requirements:
41
- - - ! '>='
45
+ - - '>='
42
46
  - !ruby/object:Gem::Version
43
47
  version: '0'
44
48
  type: :runtime
45
49
  prerelease: false
46
- version_requirements: *70217015934600
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
47
55
  - !ruby/object:Gem::Dependency
48
56
  name: nokogiri
49
- requirement: &70217015932840 !ruby/object:Gem::Requirement
50
- none: false
57
+ requirement: !ruby/object:Gem::Requirement
51
58
  requirements:
52
- - - ~>
59
+ - - '>'
53
60
  - !ruby/object:Gem::Version
54
61
  version: 1.5.6
55
62
  type: :runtime
56
63
  prerelease: false
57
- version_requirements: *70217015932840
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>'
67
+ - !ruby/object:Gem::Version
68
+ version: 1.5.6
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: activesupport
60
- requirement: &70217015931000 !ruby/object:Gem::Requirement
61
- none: false
71
+ requirement: !ruby/object:Gem::Requirement
62
72
  requirements:
63
- - - ! '>='
73
+ - - '>='
64
74
  - !ruby/object:Gem::Version
65
75
  version: '0'
66
76
  type: :runtime
67
77
  prerelease: false
68
- version_requirements: *70217015931000
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: multi_json
71
- requirement: &70217015930280 !ruby/object:Gem::Requirement
72
- none: false
85
+ requirement: !ruby/object:Gem::Requirement
73
86
  requirements:
74
87
  - - ~>
75
88
  - !ruby/object:Gem::Version
76
89
  version: '1.0'
77
90
  type: :runtime
78
91
  prerelease: false
79
- version_requirements: *70217015930280
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ~>
95
+ - !ruby/object:Gem::Version
96
+ version: '1.0'
80
97
  description: Ingesting local and remote data files into ActiveRecord
81
98
  email:
82
99
  - github@coryodaniel.com
@@ -130,27 +147,26 @@ files:
130
147
  - spec/spec_helper.rb
131
148
  homepage: http://github.com/coryodaniel/ingestor
132
149
  licenses: []
150
+ metadata: {}
133
151
  post_install_message:
134
152
  rdoc_options: []
135
153
  require_paths:
136
154
  - lib
137
155
  required_ruby_version: !ruby/object:Gem::Requirement
138
- none: false
139
156
  requirements:
140
- - - ! '>='
157
+ - - '>='
141
158
  - !ruby/object:Gem::Version
142
159
  version: '0'
143
160
  required_rubygems_version: !ruby/object:Gem::Requirement
144
- none: false
145
161
  requirements:
146
- - - ! '>='
162
+ - - '>='
147
163
  - !ruby/object:Gem::Version
148
164
  version: '0'
149
165
  requirements: []
150
166
  rubyforge_project:
151
- rubygems_version: 1.8.10
167
+ rubygems_version: 2.0.6
152
168
  signing_key:
153
- specification_version: 3
169
+ specification_version: 4
154
170
  summary: Ingesting local and remote data files into ActiveRecord
155
171
  test_files:
156
172
  - spec/cassettes/remote-zipped-files.yml