ingestor 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -71,7 +71,8 @@ Add the following to your Rakefile
71
71
  end
72
72
 
73
73
  It can handle XML, JSON, and more...
74
-
74
+
75
+ require 'ingestor/parser/xml'
75
76
  ingest("http://example.com/books.xml") do
76
77
  parser :xml
77
78
  parser_options xpath: '//book'
@@ -99,8 +100,56 @@ Add the following to your Rakefile
99
100
  }
100
101
  end
101
102
 
103
+ CSV Example
104
+
105
+ require 'ingestor/parser/csv'
106
+ ingest "./samples/contracts.csv" do
107
+ parser :csv
108
+
109
+ # all options come directly from Ruby core CSV class
110
+ parser_options :headers => true,
111
+ :col_sep => ",",
112
+ :row_sep => :auto,
113
+ :quote_char => '"',
114
+ :field_size_limit => nil,
115
+ :converters => nil,
116
+ :unconverted_fields => nil,
117
+ :return_headers => false,
118
+ :header_converters => nil,
119
+ :skip_blanks => false,
120
+ :force_quotes => false
121
+
122
+ # How to map out the columns from text to AR
123
+ map_attributes do |row|
124
+ {
125
+ id: row[0],
126
+ seller_name: row[1],
127
+ customer_name: row[2],
128
+ commencement_date: row[7],
129
+ termination_date: row[8]
130
+ }
131
+ end
132
+
133
+ # before{|attrs| attrs}
134
+
135
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
136
+ finder{|attrs|
137
+ Contract.new
138
+ }
139
+
140
+ processor{|attrs,record|
141
+ # ... custom processor here ...
142
+ record.update_attributes attrs
143
+ }
144
+
145
+ after{|record|
146
+ puts "Created: #{record.summary}"
147
+ }
148
+ end
149
+
102
150
  JSON Example
103
151
 
152
+ require 'ingestor/parser/json'
104
153
  ingest("http://example.com/people.json") do
105
154
  parser :json
106
155
  parser_options collection: lambda{|document|
@@ -236,6 +285,7 @@ Coming soon...
236
285
 
237
286
 
238
287
  ## Todos
288
+ * Deprecate plain_text (this was the first thing I created)
239
289
  * rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
240
290
  * Move includes_header to CSV, PlainText
241
291
  * Mongoid Support
@@ -0,0 +1,71 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'ingestor'
5
+ require 'ingestor/parser/csv'
6
+
7
+ # Set up a bogus active model
8
+ require 'active_model'
9
+ class Contract
10
+ include ActiveModel::Naming
11
+ def persisted?
12
+ true
13
+ end
14
+ # Make a fake active model
15
+ attr_accessor :id, :seller_name, :customer_name, :commencement_date, :termination_date
16
+ def summary
17
+ [:seller_name, :customer_name, :commencement_date, :termination_date].map{|key|
18
+ send(key)
19
+ }.join(' ')
20
+ end
21
+ def update_attributes(attributes = {})
22
+ attributes.each do |name, value|
23
+ send("#{name}=", value)
24
+ end
25
+ true
26
+ end
27
+ end
28
+ # end bogusness
29
+
30
+ ingest "./samples/contracts.csv" do
31
+ parser :csv
32
+ #sample true
33
+ parser_options :headers => true
34
+ # :col_sep => ",",
35
+ # :row_sep => :auto,
36
+ # :quote_char => '"',
37
+ # :field_size_limit => nil,
38
+ # :converters => nil,
39
+ # :unconverted_fields => nil,
40
+ # :return_headers => false,
41
+ # :header_converters => nil,
42
+ # :skip_blanks => false,
43
+ # :force_quotes => false
44
+
45
+ # How to map out the columns from text to AR
46
+ map_attributes do |row|
47
+ {
48
+ id: row[0],
49
+ seller_name: row[1],
50
+ customer_name: row[2],
51
+ commencement_date: row[7],
52
+ termination_date: row[8]
53
+ }
54
+ end
55
+
56
+ # before{|attrs| attrs}
57
+
58
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
59
+ finder{|attrs|
60
+ Contract.new
61
+ }
62
+
63
+ processor{|attrs,record|
64
+ # ... custom processor here ...
65
+ record.update_attributes attrs
66
+ }
67
+
68
+ after{|record|
69
+ puts "Created: #{record.summary}"
70
+ }
71
+ end
@@ -21,7 +21,7 @@ class HotelChain
21
21
  end
22
22
  # end bogusness
23
23
 
24
- ingest "https://www.ian.com/affiliatecenter/include/V2/ChainList.zip" do
24
+ ingest "./samples/ChainList.zip" do
25
25
  parser :plain_text
26
26
  compressed true
27
27
  includes_header true
@@ -1,6 +1,34 @@
1
+ require 'csv'
1
2
  module Ingestor
2
3
  module Parser
3
4
  class Csv
5
+ include Ingestor::Parser::Base
6
+ def options(opts={})
7
+ @options = {
8
+ :col_sep => ",",
9
+ :row_sep => :auto,
10
+ :quote_char => '"',
11
+ :field_size_limit => nil,
12
+ :converters => nil,
13
+ :unconverted_fields => nil,
14
+ :headers => false,
15
+ :return_headers => false,
16
+ :header_converters => nil,
17
+ :skip_blanks => false,
18
+ :force_quotes => false
19
+ }.merge(opts)
20
+ end
21
+
22
+ def sample!
23
+ puts CSV.parse( @document.read, @options ).first
24
+ end
25
+
26
+ def process!
27
+ CSV.parse( @document.read, @options ).each do |row|
28
+ @proxy.process_entry @proxy.options[:map_attributes].call( row )
29
+ end
30
+ end
31
+
4
32
  end
5
33
  end
6
34
  end
File without changes
@@ -1,3 +1,3 @@
1
1
  module Ingestor
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
Binary file
@@ -0,0 +1,23 @@
1
+ contract_id,seller_company_name,customer_company_name,customer_duns_number,contract_affiliate,FERC_tariff_reference,contract_service_agreement_id,contract_execution_date,contract_commencement_date,contract_termination_date,actual_termination_date,extension_provision_description,class_name,term_name,increment_name,increment_peaking_name,product_type_name,product_name,quantity,units_for_contract,rate,rate_minimum,rate_maximum,rate_description,units_for_rate,point_of_receipt_control_area,point_of_receipt_specific_location,point_of_delivery_control_area,point_of_delivery_specific_location,begin_date,end_date,time_zone
2
+ C71,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Original Volume No. 10,2,2/15/2001,2/15/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
3
+ C72,The Electric Company,Utility A,38495837,n,FERC Electric Tariff Original Volume No. 10,15,7/25/2001,8/1/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
4
+ C73,The Electric Company,Utility B,493758794,N,FERC Electric Tariff Original Volume No. 10,7,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
5
+ C74,The Electric Company,Utility C,594739573,n,FERC Electric Tariff Original Volume No. 10,25,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
6
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,ENERGY,2000,KWh,.1475, , ,Max amount of capacity and energy to be transmitted. Bill based on monthly max delivery to City.,$/KWh,PJM,Point A,PJM,Point B,,,ep
7
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,point-to-point agreement,2000,KW,0.01, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
8
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,network,2000,KW,0.2, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
9
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,BLACK START SERVICE,2000,KW,0.22, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
10
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,CAPACITY,2000,KW,0.04, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
11
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,regulation & frequency response,2000,KW,0.1, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
12
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,real power transmission loss,2000,KW,7, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
13
+ C76,The Electric Company,The Power Company,456534333,N,FERC Electric Tariff Original Volume No. 10,132,12/15/2001,1/1/2002,12/31/2004,12/31/2004,None,F,LT,M,FP,MB,CAPACITY,70,MW,3750, , ,70MW for each and every hour over the term of the agreement (7x24 schedule).,$/MW,,,,,,,ep
14
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,35, , ,,$/MWH,,,PJM,Bus 4321,20020101,20030101,EP
15
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,37, , ,,$/MWH,,,PJM,Bus 4321,20030101,20040101,EP
16
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,39, , ,,$/MWH,,,PJM,Bus 4321,20040101,20050101,EP
17
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,41, , ,,$/MWH,,,PJM,Bus 4321,20050101,20060101,EP
18
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,43, , ,,$/MWH,,,PJM,Bus 4321,20060101,20070101,EP
19
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,45, , ,,$/MWH,,,PJM,Bus 4321,20070101,20080101,EP
20
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,47, , ,,$/MWH,,,PJM,Bus 4321,20080101,20090101,EP
21
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,49, , ,,$/MWH,,,PJM,Bus 4321,20090101,20100101,EP
22
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,51, , ,,$/MWH,,,PJM,Bus 4321,20100101,20110101,EP
23
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,53, , ,,$/MWH,,,PJM,Bus 4321,20110101,20120101,EP
@@ -1,5 +1,24 @@
1
+ require 'spec_helper'
2
+ require 'ingestor/parser/csv'
1
3
 
2
- # require 'spec_helper'
3
- # describe Ingestor::Parser::Csv do
4
- # pending 'write it'
5
- # end
4
+ describe Ingestor::Parser::Csv do
5
+ describe '#process!' do
6
+ before do
7
+ @proxy = ingest("./samples/contracts.csv") do
8
+ parser :csv
9
+ parser_options headers: true
10
+
11
+ finder{|attrs| Dummy.new}
12
+ map_attributes{|row|
13
+ {
14
+ :name => row[1]
15
+ }
16
+ }
17
+ end
18
+ end
19
+
20
+ it 'should be able to process a JSON file' do
21
+ Dummy.first.name.should eq "The Electric Company"
22
+ end
23
+ end
24
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingestor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-02-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: docile
16
- requirement: &70218534200000 !ruby/object:Gem::Requirement
16
+ requirement: &70215971566020 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70218534200000
24
+ version_requirements: *70215971566020
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rubyzip
27
- requirement: &70218534199320 !ruby/object:Gem::Requirement
27
+ requirement: &70215971565380 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70218534199320
35
+ version_requirements: *70215971565380
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: thor
38
- requirement: &70218534198520 !ruby/object:Gem::Requirement
38
+ requirement: &70215971564500 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70218534198520
46
+ version_requirements: *70215971564500
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &70218534197600 !ruby/object:Gem::Requirement
49
+ requirement: &70215971563640 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.5.6
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70218534197600
57
+ version_requirements: *70215971563640
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: activesupport
60
- requirement: &70218534196500 !ruby/object:Gem::Requirement
60
+ requirement: &70215971563080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 3.2.0
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70218534196500
68
+ version_requirements: *70215971563080
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: multi_json
71
- requirement: &70218534195400 !ruby/object:Gem::Requirement
71
+ requirement: &70215971562260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '1.0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70218534195400
79
+ version_requirements: *70215971562260
80
80
  description: Ingesting local and remote data files into ActiveRecord
81
81
  email:
82
82
  - github@coryodaniel.com
@@ -94,6 +94,7 @@ files:
94
94
  - bin/ingest
95
95
  - examples/books_xml.rb
96
96
  - examples/colors_json.rb
97
+ - examples/contracts_csv.rb
97
98
  - examples/hotel_chains_plain_text.rb
98
99
  - examples/people_json.rb
99
100
  - ingestor.gemspec
@@ -101,15 +102,18 @@ files:
101
102
  - lib/ingestor/dsl.rb
102
103
  - lib/ingestor/parser/base.rb
103
104
  - lib/ingestor/parser/csv.rb
105
+ - lib/ingestor/parser/http.rb
104
106
  - lib/ingestor/parser/json.rb
105
107
  - lib/ingestor/parser/plain_text.rb
106
108
  - lib/ingestor/parser/xml.rb
107
109
  - lib/ingestor/proxy.rb
108
110
  - lib/ingestor/tasks.rb
109
111
  - lib/ingestor/version.rb
112
+ - samples/ChainList.zip
110
113
  - samples/animals.csv
111
114
  - samples/books.xml
112
115
  - samples/colors.json
116
+ - samples/contracts.csv
113
117
  - samples/flags.txt
114
118
  - samples/people.json
115
119
  - spec/cassettes/remote-zipped-files.yml