ingestor 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -10,9 +10,7 @@ gem "guard-rspec"
10
10
  gem 'debugger', '1.2.0'
11
11
  gem 'vcr'
12
12
  gem 'fakeweb'
13
- gem 'nokogiri'
14
13
  gem 'activerecord', "~>3.2.0"
15
- gem 'activesupport', "~>3.2.0"
16
14
  gem 'mysql2'
17
15
  gem 'rb-fsevent'
18
16
  gem 'ruby_gntp'
data/README.md CHANGED
@@ -99,6 +99,33 @@ Add the following to your Rakefile
99
99
  }
100
100
  end
101
101
 
102
+ JSON Example
103
+
104
+ ingest("http://example.com/people.json") do
105
+ parser :json
106
+ parser_options collection: lambda{|document|
107
+ document['people']
108
+ }
109
+ map_attributes do |values|
110
+ {
111
+ name: values["first_name"] + " " + values["last_name"]
112
+ age: values['age'],
113
+ address: values['address']
114
+ }
115
+ end
116
+
117
+ # current lines values
118
+ finder{|attrs|
119
+ Person.where(name: attrs[:name]).first || Person.new
120
+ }
121
+
122
+ processor{|attrs,record|
123
+ record.update_attributes(attrs)
124
+ record.send_junk_mail!
125
+ }
126
+ end
127
+
128
+
102
129
  ## Advanced Usage
103
130
  DSL Options
104
131
  * parser - the parser to use on the file
@@ -178,7 +205,13 @@ Writing parsers is simple ([see examples](https://github.com/coryodaniel/ingesto
178
205
  * Default libxml2 best guess
179
206
 
180
207
  ### JSON Parser
181
- Coming soon...
208
+ Parses a JSON document
209
+
210
+ Options
211
+ * collection - receives the document and narrows it down to the collection you are interested in
212
+ * Proc(Hash)
213
+ * Returns Hash | Array
214
+ * Required
182
215
 
183
216
  ### CSV Parser
184
217
  Coming soon...
@@ -203,9 +236,10 @@ Coming soon...
203
236
 
204
237
 
205
238
  ## Todos
206
- * rdoc lib/ folder
239
+ * rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
207
240
  * Move includes_header to CSV, PlainText
208
241
  * Mongoid Support
209
242
  * sort/limit options
243
+ * configure travis
210
244
  * A way to sample a file without building an ingestor first
211
245
  * bin/ingestor --sample --path=./my.xml --parser xml --parser_options_xpath '//book'
File without changes
@@ -0,0 +1,57 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'ingestor'
5
+ require 'ingestor/parser/json'
6
+
7
+ # Set up a bogus active model
8
+ require 'active_model'
9
+ class Color
10
+ include ActiveModel::Naming
11
+ def persisted?
12
+ true
13
+ end
14
+ # Make a fake active model
15
+ attr_accessor :name, :hex
16
+ def update_attributes(attributes = {})
17
+ attributes.each do |name, value|
18
+ send("#{name}=", value)
19
+ end
20
+ true
21
+ end
22
+ end
23
+ # end bogusness
24
+
25
+ ingest "./samples/colors.json" do
26
+ parser :json
27
+
28
+ # Receives the full document and narrows it down to the collection to process.
29
+ parser_options collection: lambda{|document|
30
+ document
31
+ }
32
+
33
+ # How to map out the columns from text to AR
34
+ map_attributes do |values|
35
+ {
36
+ name: values['color'],
37
+ hex: values['value']
38
+ }
39
+ end
40
+
41
+ # before{|attrs| values}
42
+
43
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
44
+ finder{|attrs|
45
+ # Book.find( attrs['id'] ) || Book.new
46
+ Color.new
47
+ }
48
+
49
+ processor{|attrs,record|
50
+ # ... custom processor here ...
51
+ record.update_attributes attrs
52
+ }
53
+
54
+ after{|record|
55
+ puts "Created: #{record.name}"
56
+ }
57
+ end
@@ -0,0 +1,55 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'ingestor'
5
+ require 'ingestor/parser/json'
6
+
7
+ # Set up a bogus active model
8
+ require 'active_model'
9
+ class Person
10
+ include ActiveModel::Naming
11
+ def persisted?
12
+ true
13
+ end
14
+ # Make a fake active model
15
+ attr_accessor :id, :first_name, :last_name, :age, :address
16
+ def update_attributes(attributes = {})
17
+ attributes.each do |name, value|
18
+ send("#{name}=", value)
19
+ end
20
+ true
21
+ end
22
+ end
23
+ # end bogusness
24
+
25
+ ingest "./samples/people.json" do
26
+ parser :json
27
+
28
+ # Receives the full document and narrows it down to the collection to process.
29
+ parser_options collection: lambda{|document|
30
+ document['people']
31
+ }
32
+ #sample true
33
+
34
+ # How to map out the columns from text to AR
35
+ map_attributes do |values|
36
+ values
37
+ end
38
+
39
+ # before{|attrs| values}
40
+
41
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
42
+ finder{|attrs|
43
+ # Book.find( attrs['id'] ) || Book.new
44
+ Person.new
45
+ }
46
+
47
+ processor{|attrs,record|
48
+ # ... custom processor here ...
49
+ record.update_attributes attrs
50
+ }
51
+
52
+ after{|record|
53
+ puts "Created: #{record.first_name} @ #{record.address}"
54
+ }
55
+ end
data/ingestor.gemspec CHANGED
@@ -19,5 +19,7 @@ Gem::Specification.new do |gem|
19
19
  gem.add_dependency "docile"
20
20
  gem.add_dependency "rubyzip"
21
21
  gem.add_dependency "thor"
22
+ gem.add_dependency "nokogiri", '~> 1.5.6'
22
23
  gem.add_dependency "activesupport", '>= 3.2.0'
24
+ gem.add_dependency 'multi_json', '~> 1.0'
23
25
  end
@@ -1,6 +1,31 @@
1
+ require 'multi_json'
2
+ require 'debugger'
1
3
  module Ingestor
2
4
  module Parser
3
5
  class Json
6
+ include Ingestor::Parser::Base
7
+ def options(opts={})
8
+ @options = {
9
+ collection: nil
10
+ }.merge(opts)
11
+ end
12
+
13
+ def sample!
14
+ puts @options[:collection].call(document).first
15
+ #puts @options[:collection] ? @options[:collection].call(document).first : document.first
16
+ end
17
+
18
+ def process!
19
+ @options[:collection].call(document).each do |attrs|
20
+ @proxy.process_entry @proxy.options[:map_attributes].call( attrs )
21
+ end
22
+ end
23
+
24
+ protected
25
+
26
+ def document
27
+ MultiJson.load(@document.read)
28
+ end
4
29
  end
5
30
  end
6
31
  end
@@ -17,19 +17,21 @@ module Ingestor
17
17
  end
18
18
 
19
19
  def sample!
20
- doc = Nokogiri::XML(@document, nil, @options[:encoding])
21
- puts Hash.from_xml( doc.xpath(@options[:xpath]).first.to_s )
20
+ puts Hash.from_xml( document.xpath(@options[:xpath]).first.to_s )
22
21
  end
23
22
 
24
23
  def process!
25
- doc = Nokogiri::XML(@document, nil, @options[:encoding])
26
-
27
- doc.xpath(@options[:xpath]).each do |node|
24
+ document.xpath(@options[:xpath]).each do |node|
28
25
  node_attrs = Hash.from_xml(node.to_s)
29
26
  attrs = @proxy.options[:map_attributes].call( node_attrs )
30
27
  @proxy.process_entry attrs
31
28
  end
32
29
  end
30
+
31
+ protected
32
+ def document
33
+ Nokogiri::XML(@document, nil, @options[:encoding])
34
+ end
33
35
  end
34
36
  end
35
37
  end
@@ -1,3 +1,3 @@
1
1
  module Ingestor
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
data/samples/people.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {"people":
2
2
  [
3
3
  {
4
- "firstName": "John",
5
- "lastName": "Smith",
4
+ "first_name": "John",
5
+ "last_name": "Smith",
6
6
  "age": 25,
7
7
  "address": {
8
8
  "streetAddress": "21 2nd Street",
@@ -12,11 +12,11 @@
12
12
  }
13
13
  },
14
14
  {
15
- "firstName": "John",
16
- "lastName": "Smith",
17
- "age": 25,
15
+ "first_name": "Joanne",
16
+ "last_name": "Smith",
17
+ "age": 93,
18
18
  "address": {
19
- "streetAddress": "21 2nd Street",
19
+ "streetAddress": "Westchester Street",
20
20
  "city": "New York",
21
21
  "state": "NY",
22
22
  "postalCode": 10021
@@ -1,5 +1,25 @@
1
+ require 'spec_helper'
2
+ require 'ingestor/parser/json'
1
3
 
2
- # require 'spec_helper'
3
- # describe Ingestor::Parser::Json do
4
- # pending 'write it'
5
- # end
4
+ describe Ingestor::Parser::Json do
5
+ describe '#process!' do
6
+ before do
7
+ @proxy = ingest("./samples/people.json") do
8
+ parser :json
9
+ parser_options collection: lambda{|document|
10
+ document['people']
11
+ }
12
+ finder{|attrs| Dummy.new}
13
+ map_attributes{|values|
14
+ {
15
+ :name => [ values['first_name'], values["last_name"] ].join(' ')
16
+ }
17
+ }
18
+ end
19
+ end
20
+
21
+ it 'should be able to process a JSON file' do
22
+ Dummy.first.name.should eq "John Smith"
23
+ end
24
+ end
25
+ end
@@ -11,14 +11,12 @@ describe Ingestor::Parser::Xml do
11
11
  })
12
12
  finder{|attrs| Dummy.new}
13
13
  map_attributes{|values|
14
- puts values
15
14
  {:name => values['book']['title']}
16
15
  }
17
16
  end
18
17
  end
19
18
 
20
19
  it 'should be able to process an XML file' do
21
-
22
20
  Dummy.first.name.should eq "XML Developer's Guide"
23
21
  end
24
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingestor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-02-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: docile
16
- requirement: &70343549624980 !ruby/object:Gem::Requirement
16
+ requirement: &70218534200000 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70343549624980
24
+ version_requirements: *70218534200000
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rubyzip
27
- requirement: &70343549624400 !ruby/object:Gem::Requirement
27
+ requirement: &70218534199320 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70343549624400
35
+ version_requirements: *70218534199320
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: thor
38
- requirement: &70343549623860 !ruby/object:Gem::Requirement
38
+ requirement: &70218534198520 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,21 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70343549623860
46
+ version_requirements: *70218534198520
47
+ - !ruby/object:Gem::Dependency
48
+ name: nokogiri
49
+ requirement: &70218534197600 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.6
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70218534197600
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: activesupport
49
- requirement: &70343549623160 !ruby/object:Gem::Requirement
60
+ requirement: &70218534196500 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,7 +65,18 @@ dependencies:
54
65
  version: 3.2.0
55
66
  type: :runtime
56
67
  prerelease: false
57
- version_requirements: *70343549623160
68
+ version_requirements: *70218534196500
69
+ - !ruby/object:Gem::Dependency
70
+ name: multi_json
71
+ requirement: &70218534195400 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: '1.0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *70218534195400
58
80
  description: Ingesting local and remote data files into ActiveRecord
59
81
  email:
60
82
  - github@coryodaniel.com
@@ -70,8 +92,10 @@ files:
70
92
  - README.md
71
93
  - Rakefile
72
94
  - bin/ingest
73
- - examples/text_parsing.rb
74
- - examples/xml_parsing.rb
95
+ - examples/books_xml.rb
96
+ - examples/colors_json.rb
97
+ - examples/hotel_chains_plain_text.rb
98
+ - examples/people_json.rb
75
99
  - ingestor.gemspec
76
100
  - lib/ingestor.rb
77
101
  - lib/ingestor/dsl.rb