ingestor 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -10,9 +10,7 @@ gem "guard-rspec"
10
10
  gem 'debugger', '1.2.0'
11
11
  gem 'vcr'
12
12
  gem 'fakeweb'
13
- gem 'nokogiri'
14
13
  gem 'activerecord', "~>3.2.0"
15
- gem 'activesupport', "~>3.2.0"
16
14
  gem 'mysql2'
17
15
  gem 'rb-fsevent'
18
16
  gem 'ruby_gntp'
data/README.md CHANGED
@@ -99,6 +99,33 @@ Add the following to your Rakefile
99
99
  }
100
100
  end
101
101
 
102
+ JSON Example
103
+
104
+ ingest("http://example.com/people.json") do
105
+ parser :json
106
+ parser_options collection: lambda{|document|
107
+ document['people']
108
+ }
109
+ map_attributes do |values|
110
+ {
111
+ name: values["first_name"] + " " + values["last_name"]
112
+ age: values['age'],
113
+ address: values['address']
114
+ }
115
+ end
116
+
117
+ # current lines values
118
+ finder{|attrs|
119
+ Person.where(name: attrs[:name]).first || Person.new
120
+ }
121
+
122
+ processor{|attrs,record|
123
+ record.update_attributes(attrs)
124
+ record.send_junk_mail!
125
+ }
126
+ end
127
+
128
+
102
129
  ## Advanced Usage
103
130
  DSL Options
104
131
  * parser - the parser to use on the file
@@ -178,7 +205,13 @@ Writing parsers is simple ([see examples](https://github.com/coryodaniel/ingesto
178
205
  * Default libxml2 best guess
179
206
 
180
207
  ### JSON Parser
181
- Coming soon...
208
+ Parses a JSON document
209
+
210
+ Options
211
+ * collection - receives the document and narrows it down to the collection you are interested in
212
+ * Proc(Hash)
213
+ * Returns Hash | Array
214
+ * Required
182
215
 
183
216
  ### CSV Parser
184
217
  Coming soon...
@@ -203,9 +236,10 @@ Coming soon...
203
236
 
204
237
 
205
238
  ## Todos
206
- * rdoc lib/ folder
239
+ * rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
207
240
  * Move includes_header to CSV, PlainText
208
241
  * Mongoid Support
209
242
  * sort/limit options
243
+ * configure travis
210
244
  * A way to sample a file without building an ingestor first
211
245
  * bin/ingestor --sample --path=./my.xml --parser xml --parser_options_xpath '//book'
File without changes
@@ -0,0 +1,57 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'ingestor'
5
+ require 'ingestor/parser/json'
6
+
7
+ # Set up a bogus active model
8
+ require 'active_model'
9
+ class Color
10
+ include ActiveModel::Naming
11
+ def persisted?
12
+ true
13
+ end
14
+ # Make a fake active model
15
+ attr_accessor :name, :hex
16
+ def update_attributes(attributes = {})
17
+ attributes.each do |name, value|
18
+ send("#{name}=", value)
19
+ end
20
+ true
21
+ end
22
+ end
23
+ # end bogusness
24
+
25
+ ingest "./samples/colors.json" do
26
+ parser :json
27
+
28
+ # Receives the full document and narrows it down to the collection to process.
29
+ parser_options collection: lambda{|document|
30
+ document
31
+ }
32
+
33
+ # How to map out the columns from text to AR
34
+ map_attributes do |values|
35
+ {
36
+ name: values['color'],
37
+ hex: values['value']
38
+ }
39
+ end
40
+
41
+ # before{|attrs| values}
42
+
43
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
44
+ finder{|attrs|
45
+ # Book.find( attrs['id'] ) || Book.new
46
+ Color.new
47
+ }
48
+
49
+ processor{|attrs,record|
50
+ # ... custom processor here ...
51
+ record.update_attributes attrs
52
+ }
53
+
54
+ after{|record|
55
+ puts "Created: #{record.name}"
56
+ }
57
+ end
@@ -0,0 +1,55 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'ingestor'
5
+ require 'ingestor/parser/json'
6
+
7
+ # Set up a bogus active model
8
+ require 'active_model'
9
+ class Person
10
+ include ActiveModel::Naming
11
+ def persisted?
12
+ true
13
+ end
14
+ # Make a fake active model
15
+ attr_accessor :id, :first_name, :last_name, :age, :address
16
+ def update_attributes(attributes = {})
17
+ attributes.each do |name, value|
18
+ send("#{name}=", value)
19
+ end
20
+ true
21
+ end
22
+ end
23
+ # end bogusness
24
+
25
+ ingest "./samples/people.json" do
26
+ parser :json
27
+
28
+ # Receives the full document and narrows it down to the collection to process.
29
+ parser_options collection: lambda{|document|
30
+ document['people']
31
+ }
32
+ #sample true
33
+
34
+ # How to map out the columns from text to AR
35
+ map_attributes do |values|
36
+ values
37
+ end
38
+
39
+ # before{|attrs| values}
40
+
41
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
42
+ finder{|attrs|
43
+ # Book.find( attrs['id'] ) || Book.new
44
+ Person.new
45
+ }
46
+
47
+ processor{|attrs,record|
48
+ # ... custom processor here ...
49
+ record.update_attributes attrs
50
+ }
51
+
52
+ after{|record|
53
+ puts "Created: #{record.first_name} @ #{record.address}"
54
+ }
55
+ end
data/ingestor.gemspec CHANGED
@@ -19,5 +19,7 @@ Gem::Specification.new do |gem|
19
19
  gem.add_dependency "docile"
20
20
  gem.add_dependency "rubyzip"
21
21
  gem.add_dependency "thor"
22
+ gem.add_dependency "nokogiri", '~> 1.5.6'
22
23
  gem.add_dependency "activesupport", '>= 3.2.0'
24
+ gem.add_dependency 'multi_json', '~> 1.0'
23
25
  end
@@ -1,6 +1,31 @@
1
+ require 'multi_json'
2
+ require 'debugger'
1
3
  module Ingestor
2
4
  module Parser
3
5
  class Json
6
+ include Ingestor::Parser::Base
7
+ def options(opts={})
8
+ @options = {
9
+ collection: nil
10
+ }.merge(opts)
11
+ end
12
+
13
+ def sample!
14
+ puts @options[:collection].call(document).first
15
+ #puts @options[:collection] ? @options[:collection].call(document).first : document.first
16
+ end
17
+
18
+ def process!
19
+ @options[:collection].call(document).each do |attrs|
20
+ @proxy.process_entry @proxy.options[:map_attributes].call( attrs )
21
+ end
22
+ end
23
+
24
+ protected
25
+
26
+ def document
27
+ MultiJson.load(@document.read)
28
+ end
4
29
  end
5
30
  end
6
31
  end
@@ -17,19 +17,21 @@ module Ingestor
17
17
  end
18
18
 
19
19
  def sample!
20
- doc = Nokogiri::XML(@document, nil, @options[:encoding])
21
- puts Hash.from_xml( doc.xpath(@options[:xpath]).first.to_s )
20
+ puts Hash.from_xml( document.xpath(@options[:xpath]).first.to_s )
22
21
  end
23
22
 
24
23
  def process!
25
- doc = Nokogiri::XML(@document, nil, @options[:encoding])
26
-
27
- doc.xpath(@options[:xpath]).each do |node|
24
+ document.xpath(@options[:xpath]).each do |node|
28
25
  node_attrs = Hash.from_xml(node.to_s)
29
26
  attrs = @proxy.options[:map_attributes].call( node_attrs )
30
27
  @proxy.process_entry attrs
31
28
  end
32
29
  end
30
+
31
+ protected
32
+ def document
33
+ Nokogiri::XML(@document, nil, @options[:encoding])
34
+ end
33
35
  end
34
36
  end
35
37
  end
@@ -1,3 +1,3 @@
1
1
  module Ingestor
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
data/samples/people.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {"people":
2
2
  [
3
3
  {
4
- "firstName": "John",
5
- "lastName": "Smith",
4
+ "first_name": "John",
5
+ "last_name": "Smith",
6
6
  "age": 25,
7
7
  "address": {
8
8
  "streetAddress": "21 2nd Street",
@@ -12,11 +12,11 @@
12
12
  }
13
13
  },
14
14
  {
15
- "firstName": "John",
16
- "lastName": "Smith",
17
- "age": 25,
15
+ "first_name": "Joanne",
16
+ "last_name": "Smith",
17
+ "age": 93,
18
18
  "address": {
19
- "streetAddress": "21 2nd Street",
19
+ "streetAddress": "Westchester Street",
20
20
  "city": "New York",
21
21
  "state": "NY",
22
22
  "postalCode": 10021
@@ -1,5 +1,25 @@
1
+ require 'spec_helper'
2
+ require 'ingestor/parser/json'
1
3
 
2
- # require 'spec_helper'
3
- # describe Ingestor::Parser::Json do
4
- # pending 'write it'
5
- # end
4
+ describe Ingestor::Parser::Json do
5
+ describe '#process!' do
6
+ before do
7
+ @proxy = ingest("./samples/people.json") do
8
+ parser :json
9
+ parser_options collection: lambda{|document|
10
+ document['people']
11
+ }
12
+ finder{|attrs| Dummy.new}
13
+ map_attributes{|values|
14
+ {
15
+ :name => [ values['first_name'], values["last_name"] ].join(' ')
16
+ }
17
+ }
18
+ end
19
+ end
20
+
21
+ it 'should be able to process a JSON file' do
22
+ Dummy.first.name.should eq "John Smith"
23
+ end
24
+ end
25
+ end
@@ -11,14 +11,12 @@ describe Ingestor::Parser::Xml do
11
11
  })
12
12
  finder{|attrs| Dummy.new}
13
13
  map_attributes{|values|
14
- puts values
15
14
  {:name => values['book']['title']}
16
15
  }
17
16
  end
18
17
  end
19
18
 
20
19
  it 'should be able to process an XML file' do
21
-
22
20
  Dummy.first.name.should eq "XML Developer's Guide"
23
21
  end
24
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingestor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-02-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: docile
16
- requirement: &70343549624980 !ruby/object:Gem::Requirement
16
+ requirement: &70218534200000 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70343549624980
24
+ version_requirements: *70218534200000
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rubyzip
27
- requirement: &70343549624400 !ruby/object:Gem::Requirement
27
+ requirement: &70218534199320 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70343549624400
35
+ version_requirements: *70218534199320
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: thor
38
- requirement: &70343549623860 !ruby/object:Gem::Requirement
38
+ requirement: &70218534198520 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,21 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70343549623860
46
+ version_requirements: *70218534198520
47
+ - !ruby/object:Gem::Dependency
48
+ name: nokogiri
49
+ requirement: &70218534197600 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.6
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70218534197600
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: activesupport
49
- requirement: &70343549623160 !ruby/object:Gem::Requirement
60
+ requirement: &70218534196500 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,7 +65,18 @@ dependencies:
54
65
  version: 3.2.0
55
66
  type: :runtime
56
67
  prerelease: false
57
- version_requirements: *70343549623160
68
+ version_requirements: *70218534196500
69
+ - !ruby/object:Gem::Dependency
70
+ name: multi_json
71
+ requirement: &70218534195400 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: '1.0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *70218534195400
58
80
  description: Ingesting local and remote data files into ActiveRecord
59
81
  email:
60
82
  - github@coryodaniel.com
@@ -70,8 +92,10 @@ files:
70
92
  - README.md
71
93
  - Rakefile
72
94
  - bin/ingest
73
- - examples/text_parsing.rb
74
- - examples/xml_parsing.rb
95
+ - examples/books_xml.rb
96
+ - examples/colors_json.rb
97
+ - examples/hotel_chains_plain_text.rb
98
+ - examples/people_json.rb
75
99
  - ingestor.gemspec
76
100
  - lib/ingestor.rb
77
101
  - lib/ingestor/dsl.rb