ingestor 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +51 -1
- data/examples/contracts_csv.rb +71 -0
- data/examples/hotel_chains_plain_text.rb +1 -1
- data/lib/ingestor/parser/csv.rb +28 -0
- data/lib/ingestor/parser/http.rb +0 -0
- data/lib/ingestor/version.rb +1 -1
- data/samples/ChainList.zip +0 -0
- data/samples/contracts.csv +23 -0
- data/spec/lib/ingestor/parser/csv_spec.rb +23 -4
- metadata +17 -13
data/README.md
CHANGED
@@ -71,7 +71,8 @@ Add the following to your Rakefile
|
|
71
71
|
end
|
72
72
|
|
73
73
|
It can handle XML, JSON, and more...
|
74
|
-
|
74
|
+
|
75
|
+
require 'ingestor/parser/xml'
|
75
76
|
ingest("http://example.com/books.xml") do
|
76
77
|
parser :xml
|
77
78
|
parser_options xpath: '//book'
|
@@ -99,8 +100,56 @@ Add the following to your Rakefile
|
|
99
100
|
}
|
100
101
|
end
|
101
102
|
|
103
|
+
CSV Example
|
104
|
+
|
105
|
+
require 'ingestor/parser/csv'
|
106
|
+
ingest "./samples/contracts.csv" do
|
107
|
+
parser :csv
|
108
|
+
|
109
|
+
# all options come directly from Ruby core CSV class
|
110
|
+
parser_options :headers => true,
|
111
|
+
:col_sep => ",",
|
112
|
+
:row_sep => :auto,
|
113
|
+
:quote_char => '"',
|
114
|
+
:field_size_limit => nil,
|
115
|
+
:converters => nil,
|
116
|
+
:unconverted_fields => nil,
|
117
|
+
:return_headers => false,
|
118
|
+
:header_converters => nil,
|
119
|
+
:skip_blanks => false,
|
120
|
+
:force_quotes => false
|
121
|
+
|
122
|
+
# How to map out the columns from text to AR
|
123
|
+
map_attributes do |row|
|
124
|
+
{
|
125
|
+
id: row[0],
|
126
|
+
seller_name: row[1],
|
127
|
+
customer_name: row[2],
|
128
|
+
commencement_date: row[7],
|
129
|
+
termination_date: row[8]
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
# before{|attrs| attrs}
|
134
|
+
|
135
|
+
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
136
|
+
finder{|attrs|
|
137
|
+
Contract.new
|
138
|
+
}
|
139
|
+
|
140
|
+
processor{|attrs,record|
|
141
|
+
# ... custom processor here ...
|
142
|
+
record.update_attributes attrs
|
143
|
+
}
|
144
|
+
|
145
|
+
after{|record|
|
146
|
+
puts "Created: #{record.summary}"
|
147
|
+
}
|
148
|
+
end
|
149
|
+
|
102
150
|
JSON Example
|
103
151
|
|
152
|
+
require 'ingestor/parser/json'
|
104
153
|
ingest("http://example.com/people.json") do
|
105
154
|
parser :json
|
106
155
|
parser_options collection: lambda{|document|
|
@@ -236,6 +285,7 @@ Coming soon...
|
|
236
285
|
|
237
286
|
|
238
287
|
## Todos
|
288
|
+
* Deprecate plain_text (this was the first thing I created)
|
239
289
|
* rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
|
240
290
|
* Move includes_header to CSV, PlainText
|
241
291
|
* Mongoid Support
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'ingestor'
|
5
|
+
require 'ingestor/parser/csv'
|
6
|
+
|
7
|
+
# Set up a bogus active model
|
8
|
+
require 'active_model'
|
9
|
+
class Contract
|
10
|
+
include ActiveModel::Naming
|
11
|
+
def persisted?
|
12
|
+
true
|
13
|
+
end
|
14
|
+
# Make a fake active model
|
15
|
+
attr_accessor :id, :seller_name, :customer_name, :commencement_date, :termination_date
|
16
|
+
def summary
|
17
|
+
[:seller_name, :customer_name, :commencement_date, :termination_date].map{|key|
|
18
|
+
send(key)
|
19
|
+
}.join(' ')
|
20
|
+
end
|
21
|
+
def update_attributes(attributes = {})
|
22
|
+
attributes.each do |name, value|
|
23
|
+
send("#{name}=", value)
|
24
|
+
end
|
25
|
+
true
|
26
|
+
end
|
27
|
+
end
|
28
|
+
# end bogusness
|
29
|
+
|
30
|
+
ingest "./samples/contracts.csv" do
|
31
|
+
parser :csv
|
32
|
+
#sample true
|
33
|
+
parser_options :headers => true
|
34
|
+
# :col_sep => ",",
|
35
|
+
# :row_sep => :auto,
|
36
|
+
# :quote_char => '"',
|
37
|
+
# :field_size_limit => nil,
|
38
|
+
# :converters => nil,
|
39
|
+
# :unconverted_fields => nil,
|
40
|
+
# :return_headers => false,
|
41
|
+
# :header_converters => nil,
|
42
|
+
# :skip_blanks => false,
|
43
|
+
# :force_quotes => false
|
44
|
+
|
45
|
+
# How to map out the columns from text to AR
|
46
|
+
map_attributes do |row|
|
47
|
+
{
|
48
|
+
id: row[0],
|
49
|
+
seller_name: row[1],
|
50
|
+
customer_name: row[2],
|
51
|
+
commencement_date: row[7],
|
52
|
+
termination_date: row[8]
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
# before{|attrs| attrs}
|
57
|
+
|
58
|
+
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
59
|
+
finder{|attrs|
|
60
|
+
Contract.new
|
61
|
+
}
|
62
|
+
|
63
|
+
processor{|attrs,record|
|
64
|
+
# ... custom processor here ...
|
65
|
+
record.update_attributes attrs
|
66
|
+
}
|
67
|
+
|
68
|
+
after{|record|
|
69
|
+
puts "Created: #{record.summary}"
|
70
|
+
}
|
71
|
+
end
|
data/lib/ingestor/parser/csv.rb
CHANGED
@@ -1,6 +1,34 @@
|
|
1
|
+
require 'csv'
|
1
2
|
module Ingestor
|
2
3
|
module Parser
|
3
4
|
class Csv
|
5
|
+
include Ingestor::Parser::Base
|
6
|
+
def options(opts={})
|
7
|
+
@options = {
|
8
|
+
:col_sep => ",",
|
9
|
+
:row_sep => :auto,
|
10
|
+
:quote_char => '"',
|
11
|
+
:field_size_limit => nil,
|
12
|
+
:converters => nil,
|
13
|
+
:unconverted_fields => nil,
|
14
|
+
:headers => false,
|
15
|
+
:return_headers => false,
|
16
|
+
:header_converters => nil,
|
17
|
+
:skip_blanks => false,
|
18
|
+
:force_quotes => false
|
19
|
+
}.merge(opts)
|
20
|
+
end
|
21
|
+
|
22
|
+
def sample!
|
23
|
+
puts CSV.parse( @document.read, @options ).first
|
24
|
+
end
|
25
|
+
|
26
|
+
def process!
|
27
|
+
CSV.parse( @document.read, @options ).each do |row|
|
28
|
+
@proxy.process_entry @proxy.options[:map_attributes].call( row )
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
4
32
|
end
|
5
33
|
end
|
6
34
|
end
|
File without changes
|
data/lib/ingestor/version.rb
CHANGED
Binary file
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contract_id,seller_company_name,customer_company_name,customer_duns_number,contract_affiliate,FERC_tariff_reference,contract_service_agreement_id,contract_execution_date,contract_commencement_date,contract_termination_date,actual_termination_date,extension_provision_description,class_name,term_name,increment_name,increment_peaking_name,product_type_name,product_name,quantity,units_for_contract,rate,rate_minimum,rate_maximum,rate_description,units_for_rate,point_of_receipt_control_area,point_of_receipt_specific_location,point_of_delivery_control_area,point_of_delivery_specific_location,begin_date,end_date,time_zone
|
2
|
+
C71,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Original Volume No. 10,2,2/15/2001,2/15/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
|
3
|
+
C72,The Electric Company,Utility A,38495837,n,FERC Electric Tariff Original Volume No. 10,15,7/25/2001,8/1/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
|
4
|
+
C73,The Electric Company,Utility B,493758794,N,FERC Electric Tariff Original Volume No. 10,7,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
|
5
|
+
C74,The Electric Company,Utility C,594739573,n,FERC Electric Tariff Original Volume No. 10,25,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
|
6
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,ENERGY,2000,KWh,.1475, , ,Max amount of capacity and energy to be transmitted. Bill based on monthly max delivery to City.,$/KWh,PJM,Point A,PJM,Point B,,,ep
|
7
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,point-to-point agreement,2000,KW,0.01, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
8
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,network,2000,KW,0.2, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
9
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,BLACK START SERVICE,2000,KW,0.22, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
10
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,CAPACITY,2000,KW,0.04, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
11
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,regulation & frequency response,2000,KW,0.1, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
12
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,real power transmission loss,2000,KW,7, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
13
|
+
C76,The Electric Company,The Power Company,456534333,N,FERC Electric Tariff Original Volume No. 10,132,12/15/2001,1/1/2002,12/31/2004,12/31/2004,None,F,LT,M,FP,MB,CAPACITY,70,MW,3750, , ,70MW for each and every hour over the term of the agreement (7x24 schedule).,$/MW,,,,,,,ep
|
14
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,35, , ,,$/MWH,,,PJM,Bus 4321,20020101,20030101,EP
|
15
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,37, , ,,$/MWH,,,PJM,Bus 4321,20030101,20040101,EP
|
16
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,39, , ,,$/MWH,,,PJM,Bus 4321,20040101,20050101,EP
|
17
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,41, , ,,$/MWH,,,PJM,Bus 4321,20050101,20060101,EP
|
18
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,43, , ,,$/MWH,,,PJM,Bus 4321,20060101,20070101,EP
|
19
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,45, , ,,$/MWH,,,PJM,Bus 4321,20070101,20080101,EP
|
20
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,47, , ,,$/MWH,,,PJM,Bus 4321,20080101,20090101,EP
|
21
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,49, , ,,$/MWH,,,PJM,Bus 4321,20090101,20100101,EP
|
22
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,51, , ,,$/MWH,,,PJM,Bus 4321,20100101,20110101,EP
|
23
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,53, , ,,$/MWH,,,PJM,Bus 4321,20110101,20120101,EP
|
@@ -1,5 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ingestor/parser/csv'
|
1
3
|
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
4
|
+
describe Ingestor::Parser::Csv do
|
5
|
+
describe '#process!' do
|
6
|
+
before do
|
7
|
+
@proxy = ingest("./samples/contracts.csv") do
|
8
|
+
parser :csv
|
9
|
+
parser_options headers: true
|
10
|
+
|
11
|
+
finder{|attrs| Dummy.new}
|
12
|
+
map_attributes{|row|
|
13
|
+
{
|
14
|
+
:name => row[1]
|
15
|
+
}
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should be able to process a JSON file' do
|
21
|
+
Dummy.first.name.should eq "The Electric Company"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ingestor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-02-22 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: docile
|
16
|
-
requirement: &
|
16
|
+
requirement: &70215971566020 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70215971566020
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rubyzip
|
27
|
-
requirement: &
|
27
|
+
requirement: &70215971565380 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70215971565380
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: thor
|
38
|
-
requirement: &
|
38
|
+
requirement: &70215971564500 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70215971564500
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: nokogiri
|
49
|
-
requirement: &
|
49
|
+
requirement: &70215971563640 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.5.6
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70215971563640
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: activesupport
|
60
|
-
requirement: &
|
60
|
+
requirement: &70215971563080 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 3.2.0
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70215971563080
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: multi_json
|
71
|
-
requirement: &
|
71
|
+
requirement: &70215971562260 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '1.0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70215971562260
|
80
80
|
description: Ingesting local and remote data files into ActiveRecord
|
81
81
|
email:
|
82
82
|
- github@coryodaniel.com
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- bin/ingest
|
95
95
|
- examples/books_xml.rb
|
96
96
|
- examples/colors_json.rb
|
97
|
+
- examples/contracts_csv.rb
|
97
98
|
- examples/hotel_chains_plain_text.rb
|
98
99
|
- examples/people_json.rb
|
99
100
|
- ingestor.gemspec
|
@@ -101,15 +102,18 @@ files:
|
|
101
102
|
- lib/ingestor/dsl.rb
|
102
103
|
- lib/ingestor/parser/base.rb
|
103
104
|
- lib/ingestor/parser/csv.rb
|
105
|
+
- lib/ingestor/parser/http.rb
|
104
106
|
- lib/ingestor/parser/json.rb
|
105
107
|
- lib/ingestor/parser/plain_text.rb
|
106
108
|
- lib/ingestor/parser/xml.rb
|
107
109
|
- lib/ingestor/proxy.rb
|
108
110
|
- lib/ingestor/tasks.rb
|
109
111
|
- lib/ingestor/version.rb
|
112
|
+
- samples/ChainList.zip
|
110
113
|
- samples/animals.csv
|
111
114
|
- samples/books.xml
|
112
115
|
- samples/colors.json
|
116
|
+
- samples/contracts.csv
|
113
117
|
- samples/flags.txt
|
114
118
|
- samples/people.json
|
115
119
|
- spec/cassettes/remote-zipped-files.yml
|