ingestor 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -71,7 +71,8 @@ Add the following to your Rakefile
71
71
  end
72
72
 
73
73
  It can handle XML, JSON, and more...
74
-
74
+
75
+ require 'ingestor/parser/xml'
75
76
  ingest("http://example.com/books.xml") do
76
77
  parser :xml
77
78
  parser_options xpath: '//book'
@@ -99,8 +100,56 @@ Add the following to your Rakefile
99
100
  }
100
101
  end
101
102
 
103
+ CSV Example
104
+
105
+ require 'ingestor/parser/csv'
106
+ ingest "./samples/contracts.csv" do
107
+ parser :csv
108
+
109
+ # all options come directly from Ruby core CSV class
110
+ parser_options :headers => true,
111
+ :col_sep => ",",
112
+ :row_sep => :auto,
113
+ :quote_char => '"',
114
+ :field_size_limit => nil,
115
+ :converters => nil,
116
+ :unconverted_fields => nil,
117
+ :return_headers => false,
118
+ :header_converters => nil,
119
+ :skip_blanks => false,
120
+ :force_quotes => false
121
+
122
+ # How to map out the columns from text to AR
123
+ map_attributes do |row|
124
+ {
125
+ id: row[0],
126
+ seller_name: row[1],
127
+ customer_name: row[2],
128
+ commencement_date: row[7],
129
+ termination_date: row[8]
130
+ }
131
+ end
132
+
133
+ # before{|attrs| attrs}
134
+
135
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
136
+ finder{|attrs|
137
+ Contract.new
138
+ }
139
+
140
+ processor{|attrs,record|
141
+ # ... custom processor here ...
142
+ record.update_attributes attrs
143
+ }
144
+
145
+ after{|record|
146
+ puts "Created: #{record.summary}"
147
+ }
148
+ end
149
+
102
150
  JSON Example
103
151
 
152
+ require 'ingestor/parser/json'
104
153
  ingest("http://example.com/people.json") do
105
154
  parser :json
106
155
  parser_options collection: lambda{|document|
@@ -236,6 +285,7 @@ Coming soon...
236
285
 
237
286
 
238
287
  ## Todos
288
+ * Deprecate plain_text (this was the first thing I created)
239
289
  * rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
240
290
  * Move includes_header to CSV, PlainText
241
291
  * Mongoid Support
@@ -0,0 +1,71 @@
1
+ #! /usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ require 'ingestor'
5
+ require 'ingestor/parser/csv'
6
+
7
+ # Set up a bogus active model
8
+ require 'active_model'
9
+ class Contract
10
+ include ActiveModel::Naming
11
+ def persisted?
12
+ true
13
+ end
14
+ # Make a fake active model
15
+ attr_accessor :id, :seller_name, :customer_name, :commencement_date, :termination_date
16
+ def summary
17
+ [:seller_name, :customer_name, :commencement_date, :termination_date].map{|key|
18
+ send(key)
19
+ }.join(' ')
20
+ end
21
+ def update_attributes(attributes = {})
22
+ attributes.each do |name, value|
23
+ send("#{name}=", value)
24
+ end
25
+ true
26
+ end
27
+ end
28
+ # end bogusness
29
+
30
+ ingest "./samples/contracts.csv" do
31
+ parser :csv
32
+ #sample true
33
+ parser_options :headers => true
34
+ # :col_sep => ",",
35
+ # :row_sep => :auto,
36
+ # :quote_char => '"',
37
+ # :field_size_limit => nil,
38
+ # :converters => nil,
39
+ # :unconverted_fields => nil,
40
+ # :return_headers => false,
41
+ # :header_converters => nil,
42
+ # :skip_blanks => false,
43
+ # :force_quotes => false
44
+
45
+ # How to map out the columns from text to AR
46
+ map_attributes do |row|
47
+ {
48
+ id: row[0],
49
+ seller_name: row[1],
50
+ customer_name: row[2],
51
+ commencement_date: row[7],
52
+ termination_date: row[8]
53
+ }
54
+ end
55
+
56
+ # before{|attrs| attrs}
57
+
58
+ # Your strategy for finding or instantiating a new object to be handled by the processor block
59
+ finder{|attrs|
60
+ Contract.new
61
+ }
62
+
63
+ processor{|attrs,record|
64
+ # ... custom processor here ...
65
+ record.update_attributes attrs
66
+ }
67
+
68
+ after{|record|
69
+ puts "Created: #{record.summary}"
70
+ }
71
+ end
@@ -21,7 +21,7 @@ class HotelChain
21
21
  end
22
22
  # end bogusness
23
23
 
24
- ingest "https://www.ian.com/affiliatecenter/include/V2/ChainList.zip" do
24
+ ingest "./samples/ChainList.zip" do
25
25
  parser :plain_text
26
26
  compressed true
27
27
  includes_header true
@@ -1,6 +1,34 @@
1
+ require 'csv'
1
2
  module Ingestor
2
3
  module Parser
3
4
  class Csv
5
+ include Ingestor::Parser::Base
6
+ def options(opts={})
7
+ @options = {
8
+ :col_sep => ",",
9
+ :row_sep => :auto,
10
+ :quote_char => '"',
11
+ :field_size_limit => nil,
12
+ :converters => nil,
13
+ :unconverted_fields => nil,
14
+ :headers => false,
15
+ :return_headers => false,
16
+ :header_converters => nil,
17
+ :skip_blanks => false,
18
+ :force_quotes => false
19
+ }.merge(opts)
20
+ end
21
+
22
+ def sample!
23
+ puts CSV.parse( @document.read, @options ).first
24
+ end
25
+
26
+ def process!
27
+ CSV.parse( @document.read, @options ).each do |row|
28
+ @proxy.process_entry @proxy.options[:map_attributes].call( row )
29
+ end
30
+ end
31
+
4
32
  end
5
33
  end
6
34
  end
File without changes
@@ -1,3 +1,3 @@
1
1
  module Ingestor
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
Binary file
@@ -0,0 +1,23 @@
1
+ contract_id,seller_company_name,customer_company_name,customer_duns_number,contract_affiliate,FERC_tariff_reference,contract_service_agreement_id,contract_execution_date,contract_commencement_date,contract_termination_date,actual_termination_date,extension_provision_description,class_name,term_name,increment_name,increment_peaking_name,product_type_name,product_name,quantity,units_for_contract,rate,rate_minimum,rate_maximum,rate_description,units_for_rate,point_of_receipt_control_area,point_of_receipt_specific_location,point_of_delivery_control_area,point_of_delivery_specific_location,begin_date,end_date,time_zone
2
+ C71,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Original Volume No. 10,2,2/15/2001,2/15/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
3
+ C72,The Electric Company,Utility A,38495837,n,FERC Electric Tariff Original Volume No. 10,15,7/25/2001,8/1/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
4
+ C73,The Electric Company,Utility B,493758794,N,FERC Electric Tariff Original Volume No. 10,7,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
5
+ C74,The Electric Company,Utility C,594739573,n,FERC Electric Tariff Original Volume No. 10,25,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
6
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,ENERGY,2000,KWh,.1475, , ,Max amount of capacity and energy to be transmitted. Bill based on monthly max delivery to City.,$/KWh,PJM,Point A,PJM,Point B,,,ep
7
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,point-to-point agreement,2000,KW,0.01, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
8
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,network,2000,KW,0.2, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
9
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,BLACK START SERVICE,2000,KW,0.22, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
10
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,CAPACITY,2000,KW,0.04, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
11
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,regulation & frequency response,2000,KW,0.1, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
12
+ C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,real power transmission loss,2000,KW,7, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
13
+ C76,The Electric Company,The Power Company,456534333,N,FERC Electric Tariff Original Volume No. 10,132,12/15/2001,1/1/2002,12/31/2004,12/31/2004,None,F,LT,M,FP,MB,CAPACITY,70,MW,3750, , ,70MW for each and every hour over the term of the agreement (7x24 schedule).,$/MW,,,,,,,ep
14
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,35, , ,,$/MWH,,,PJM,Bus 4321,20020101,20030101,EP
15
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,37, , ,,$/MWH,,,PJM,Bus 4321,20030101,20040101,EP
16
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,39, , ,,$/MWH,,,PJM,Bus 4321,20040101,20050101,EP
17
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,41, , ,,$/MWH,,,PJM,Bus 4321,20050101,20060101,EP
18
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,43, , ,,$/MWH,,,PJM,Bus 4321,20060101,20070101,EP
19
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,45, , ,,$/MWH,,,PJM,Bus 4321,20070101,20080101,EP
20
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,47, , ,,$/MWH,,,PJM,Bus 4321,20080101,20090101,EP
21
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,49, , ,,$/MWH,,,PJM,Bus 4321,20090101,20100101,EP
22
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,51, , ,,$/MWH,,,PJM,Bus 4321,20100101,20110101,EP
23
+ C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,53, , ,,$/MWH,,,PJM,Bus 4321,20110101,20120101,EP
@@ -1,5 +1,24 @@
1
+ require 'spec_helper'
2
+ require 'ingestor/parser/csv'
1
3
 
2
- # require 'spec_helper'
3
- # describe Ingestor::Parser::Csv do
4
- # pending 'write it'
5
- # end
4
+ describe Ingestor::Parser::Csv do
5
+ describe '#process!' do
6
+ before do
7
+ @proxy = ingest("./samples/contracts.csv") do
8
+ parser :csv
9
+ parser_options headers: true
10
+
11
+ finder{|attrs| Dummy.new}
12
+ map_attributes{|row|
13
+ {
14
+ :name => row[1]
15
+ }
16
+ }
17
+ end
18
+ end
19
+
20
+ it 'should be able to process a JSON file' do
21
+ Dummy.first.name.should eq "The Electric Company"
22
+ end
23
+ end
24
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingestor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-02-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: docile
16
- requirement: &70218534200000 !ruby/object:Gem::Requirement
16
+ requirement: &70215971566020 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70218534200000
24
+ version_requirements: *70215971566020
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rubyzip
27
- requirement: &70218534199320 !ruby/object:Gem::Requirement
27
+ requirement: &70215971565380 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70218534199320
35
+ version_requirements: *70215971565380
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: thor
38
- requirement: &70218534198520 !ruby/object:Gem::Requirement
38
+ requirement: &70215971564500 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70218534198520
46
+ version_requirements: *70215971564500
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &70218534197600 !ruby/object:Gem::Requirement
49
+ requirement: &70215971563640 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.5.6
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70218534197600
57
+ version_requirements: *70215971563640
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: activesupport
60
- requirement: &70218534196500 !ruby/object:Gem::Requirement
60
+ requirement: &70215971563080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 3.2.0
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70218534196500
68
+ version_requirements: *70215971563080
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: multi_json
71
- requirement: &70218534195400 !ruby/object:Gem::Requirement
71
+ requirement: &70215971562260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '1.0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70218534195400
79
+ version_requirements: *70215971562260
80
80
  description: Ingesting local and remote data files into ActiveRecord
81
81
  email:
82
82
  - github@coryodaniel.com
@@ -94,6 +94,7 @@ files:
94
94
  - bin/ingest
95
95
  - examples/books_xml.rb
96
96
  - examples/colors_json.rb
97
+ - examples/contracts_csv.rb
97
98
  - examples/hotel_chains_plain_text.rb
98
99
  - examples/people_json.rb
99
100
  - ingestor.gemspec
@@ -101,15 +102,18 @@ files:
101
102
  - lib/ingestor/dsl.rb
102
103
  - lib/ingestor/parser/base.rb
103
104
  - lib/ingestor/parser/csv.rb
105
+ - lib/ingestor/parser/http.rb
104
106
  - lib/ingestor/parser/json.rb
105
107
  - lib/ingestor/parser/plain_text.rb
106
108
  - lib/ingestor/parser/xml.rb
107
109
  - lib/ingestor/proxy.rb
108
110
  - lib/ingestor/tasks.rb
109
111
  - lib/ingestor/version.rb
112
+ - samples/ChainList.zip
110
113
  - samples/animals.csv
111
114
  - samples/books.xml
112
115
  - samples/colors.json
116
+ - samples/contracts.csv
113
117
  - samples/flags.txt
114
118
  - samples/people.json
115
119
  - spec/cassettes/remote-zipped-files.yml