ingestor 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +51 -1
- data/examples/contracts_csv.rb +71 -0
- data/examples/hotel_chains_plain_text.rb +1 -1
- data/lib/ingestor/parser/csv.rb +28 -0
- data/lib/ingestor/parser/http.rb +0 -0
- data/lib/ingestor/version.rb +1 -1
- data/samples/ChainList.zip +0 -0
- data/samples/contracts.csv +23 -0
- data/spec/lib/ingestor/parser/csv_spec.rb +23 -4
- metadata +17 -13
data/README.md
CHANGED
@@ -71,7 +71,8 @@ Add the following to your Rakefile
|
|
71
71
|
end
|
72
72
|
|
73
73
|
It can handle XML, JSON, and more...
|
74
|
-
|
74
|
+
|
75
|
+
require 'ingestor/parser/xml'
|
75
76
|
ingest("http://example.com/books.xml") do
|
76
77
|
parser :xml
|
77
78
|
parser_options xpath: '//book'
|
@@ -99,8 +100,56 @@ Add the following to your Rakefile
|
|
99
100
|
}
|
100
101
|
end
|
101
102
|
|
103
|
+
CSV Example
|
104
|
+
|
105
|
+
require 'ingestor/parser/csv'
|
106
|
+
ingest "./samples/contracts.csv" do
|
107
|
+
parser :csv
|
108
|
+
|
109
|
+
# all options come directly from Ruby core CSV class
|
110
|
+
parser_options :headers => true,
|
111
|
+
:col_sep => ",",
|
112
|
+
:row_sep => :auto,
|
113
|
+
:quote_char => '"',
|
114
|
+
:field_size_limit => nil,
|
115
|
+
:converters => nil,
|
116
|
+
:unconverted_fields => nil,
|
117
|
+
:return_headers => false,
|
118
|
+
:header_converters => nil,
|
119
|
+
:skip_blanks => false,
|
120
|
+
:force_quotes => false
|
121
|
+
|
122
|
+
# How to map out the columns from text to AR
|
123
|
+
map_attributes do |row|
|
124
|
+
{
|
125
|
+
id: row[0],
|
126
|
+
seller_name: row[1],
|
127
|
+
customer_name: row[2],
|
128
|
+
commencement_date: row[7],
|
129
|
+
termination_date: row[8]
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
# before{|attrs| attrs}
|
134
|
+
|
135
|
+
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
136
|
+
finder{|attrs|
|
137
|
+
Contract.new
|
138
|
+
}
|
139
|
+
|
140
|
+
processor{|attrs,record|
|
141
|
+
# ... custom processor here ...
|
142
|
+
record.update_attributes attrs
|
143
|
+
}
|
144
|
+
|
145
|
+
after{|record|
|
146
|
+
puts "Created: #{record.summary}"
|
147
|
+
}
|
148
|
+
end
|
149
|
+
|
102
150
|
JSON Example
|
103
151
|
|
152
|
+
require 'ingestor/parser/json'
|
104
153
|
ingest("http://example.com/people.json") do
|
105
154
|
parser :json
|
106
155
|
parser_options collection: lambda{|document|
|
@@ -236,6 +285,7 @@ Coming soon...
|
|
236
285
|
|
237
286
|
|
238
287
|
## Todos
|
288
|
+
* Deprecate plain_text (this was the first thing I created)
|
239
289
|
* rdoc http://rdoc.rubyforge.org/RDoc/Markup.html
|
240
290
|
* Move includes_header to CSV, PlainText
|
241
291
|
* Mongoid Support
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'ingestor'
|
5
|
+
require 'ingestor/parser/csv'
|
6
|
+
|
7
|
+
# Set up a bogus active model
|
8
|
+
require 'active_model'
|
9
|
+
class Contract
|
10
|
+
include ActiveModel::Naming
|
11
|
+
def persisted?
|
12
|
+
true
|
13
|
+
end
|
14
|
+
# Make a fake active model
|
15
|
+
attr_accessor :id, :seller_name, :customer_name, :commencement_date, :termination_date
|
16
|
+
def summary
|
17
|
+
[:seller_name, :customer_name, :commencement_date, :termination_date].map{|key|
|
18
|
+
send(key)
|
19
|
+
}.join(' ')
|
20
|
+
end
|
21
|
+
def update_attributes(attributes = {})
|
22
|
+
attributes.each do |name, value|
|
23
|
+
send("#{name}=", value)
|
24
|
+
end
|
25
|
+
true
|
26
|
+
end
|
27
|
+
end
|
28
|
+
# end bogusness
|
29
|
+
|
30
|
+
ingest "./samples/contracts.csv" do
|
31
|
+
parser :csv
|
32
|
+
#sample true
|
33
|
+
parser_options :headers => true
|
34
|
+
# :col_sep => ",",
|
35
|
+
# :row_sep => :auto,
|
36
|
+
# :quote_char => '"',
|
37
|
+
# :field_size_limit => nil,
|
38
|
+
# :converters => nil,
|
39
|
+
# :unconverted_fields => nil,
|
40
|
+
# :return_headers => false,
|
41
|
+
# :header_converters => nil,
|
42
|
+
# :skip_blanks => false,
|
43
|
+
# :force_quotes => false
|
44
|
+
|
45
|
+
# How to map out the columns from text to AR
|
46
|
+
map_attributes do |row|
|
47
|
+
{
|
48
|
+
id: row[0],
|
49
|
+
seller_name: row[1],
|
50
|
+
customer_name: row[2],
|
51
|
+
commencement_date: row[7],
|
52
|
+
termination_date: row[8]
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
# before{|attrs| attrs}
|
57
|
+
|
58
|
+
# Your strategy for finding or instantiating a new object to be handled by the processor block
|
59
|
+
finder{|attrs|
|
60
|
+
Contract.new
|
61
|
+
}
|
62
|
+
|
63
|
+
processor{|attrs,record|
|
64
|
+
# ... custom processor here ...
|
65
|
+
record.update_attributes attrs
|
66
|
+
}
|
67
|
+
|
68
|
+
after{|record|
|
69
|
+
puts "Created: #{record.summary}"
|
70
|
+
}
|
71
|
+
end
|
data/lib/ingestor/parser/csv.rb
CHANGED
@@ -1,6 +1,34 @@
|
|
1
|
+
require 'csv'
|
1
2
|
module Ingestor
|
2
3
|
module Parser
|
3
4
|
class Csv
|
5
|
+
include Ingestor::Parser::Base
|
6
|
+
def options(opts={})
|
7
|
+
@options = {
|
8
|
+
:col_sep => ",",
|
9
|
+
:row_sep => :auto,
|
10
|
+
:quote_char => '"',
|
11
|
+
:field_size_limit => nil,
|
12
|
+
:converters => nil,
|
13
|
+
:unconverted_fields => nil,
|
14
|
+
:headers => false,
|
15
|
+
:return_headers => false,
|
16
|
+
:header_converters => nil,
|
17
|
+
:skip_blanks => false,
|
18
|
+
:force_quotes => false
|
19
|
+
}.merge(opts)
|
20
|
+
end
|
21
|
+
|
22
|
+
def sample!
|
23
|
+
puts CSV.parse( @document.read, @options ).first
|
24
|
+
end
|
25
|
+
|
26
|
+
def process!
|
27
|
+
CSV.parse( @document.read, @options ).each do |row|
|
28
|
+
@proxy.process_entry @proxy.options[:map_attributes].call( row )
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
4
32
|
end
|
5
33
|
end
|
6
34
|
end
|
File without changes
|
data/lib/ingestor/version.rb
CHANGED
Binary file
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contract_id,seller_company_name,customer_company_name,customer_duns_number,contract_affiliate,FERC_tariff_reference,contract_service_agreement_id,contract_execution_date,contract_commencement_date,contract_termination_date,actual_termination_date,extension_provision_description,class_name,term_name,increment_name,increment_peaking_name,product_type_name,product_name,quantity,units_for_contract,rate,rate_minimum,rate_maximum,rate_description,units_for_rate,point_of_receipt_control_area,point_of_receipt_specific_location,point_of_delivery_control_area,point_of_delivery_specific_location,begin_date,end_date,time_zone
|
2
|
+
C71,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Original Volume No. 10,2,2/15/2001,2/15/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
|
3
|
+
C72,The Electric Company,Utility A,38495837,n,FERC Electric Tariff Original Volume No. 10,15,7/25/2001,8/1/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
|
4
|
+
C73,The Electric Company,Utility B,493758794,N,FERC Electric Tariff Original Volume No. 10,7,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
|
5
|
+
C74,The Electric Company,Utility C,594739573,n,FERC Electric Tariff Original Volume No. 10,25,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
|
6
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,ENERGY,2000,KWh,.1475, , ,Max amount of capacity and energy to be transmitted. Bill based on monthly max delivery to City.,$/KWh,PJM,Point A,PJM,Point B,,,ep
|
7
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,point-to-point agreement,2000,KW,0.01, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
8
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,network,2000,KW,0.2, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
9
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,BLACK START SERVICE,2000,KW,0.22, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
10
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,CAPACITY,2000,KW,0.04, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
11
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,regulation & frequency response,2000,KW,0.1, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
12
|
+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,real power transmission loss,2000,KW,7, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
|
13
|
+
C76,The Electric Company,The Power Company,456534333,N,FERC Electric Tariff Original Volume No. 10,132,12/15/2001,1/1/2002,12/31/2004,12/31/2004,None,F,LT,M,FP,MB,CAPACITY,70,MW,3750, , ,70MW for each and every hour over the term of the agreement (7x24 schedule).,$/MW,,,,,,,ep
|
14
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,35, , ,,$/MWH,,,PJM,Bus 4321,20020101,20030101,EP
|
15
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,37, , ,,$/MWH,,,PJM,Bus 4321,20030101,20040101,EP
|
16
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,39, , ,,$/MWH,,,PJM,Bus 4321,20040101,20050101,EP
|
17
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,41, , ,,$/MWH,,,PJM,Bus 4321,20050101,20060101,EP
|
18
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,43, , ,,$/MWH,,,PJM,Bus 4321,20060101,20070101,EP
|
19
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,45, , ,,$/MWH,,,PJM,Bus 4321,20070101,20080101,EP
|
20
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,47, , ,,$/MWH,,,PJM,Bus 4321,20080101,20090101,EP
|
21
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,49, , ,,$/MWH,,,PJM,Bus 4321,20090101,20100101,EP
|
22
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,51, , ,,$/MWH,,,PJM,Bus 4321,20100101,20110101,EP
|
23
|
+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,53, , ,,$/MWH,,,PJM,Bus 4321,20110101,20120101,EP
|
@@ -1,5 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ingestor/parser/csv'
|
1
3
|
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
4
|
+
describe Ingestor::Parser::Csv do
|
5
|
+
describe '#process!' do
|
6
|
+
before do
|
7
|
+
@proxy = ingest("./samples/contracts.csv") do
|
8
|
+
parser :csv
|
9
|
+
parser_options headers: true
|
10
|
+
|
11
|
+
finder{|attrs| Dummy.new}
|
12
|
+
map_attributes{|row|
|
13
|
+
{
|
14
|
+
:name => row[1]
|
15
|
+
}
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should be able to process a JSON file' do
|
21
|
+
Dummy.first.name.should eq "The Electric Company"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ingestor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-02-22 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: docile
|
16
|
-
requirement: &
|
16
|
+
requirement: &70215971566020 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70215971566020
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rubyzip
|
27
|
-
requirement: &
|
27
|
+
requirement: &70215971565380 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70215971565380
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: thor
|
38
|
-
requirement: &
|
38
|
+
requirement: &70215971564500 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70215971564500
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: nokogiri
|
49
|
-
requirement: &
|
49
|
+
requirement: &70215971563640 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.5.6
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70215971563640
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: activesupport
|
60
|
-
requirement: &
|
60
|
+
requirement: &70215971563080 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 3.2.0
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70215971563080
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: multi_json
|
71
|
-
requirement: &
|
71
|
+
requirement: &70215971562260 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '1.0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70215971562260
|
80
80
|
description: Ingesting local and remote data files into ActiveRecord
|
81
81
|
email:
|
82
82
|
- github@coryodaniel.com
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- bin/ingest
|
95
95
|
- examples/books_xml.rb
|
96
96
|
- examples/colors_json.rb
|
97
|
+
- examples/contracts_csv.rb
|
97
98
|
- examples/hotel_chains_plain_text.rb
|
98
99
|
- examples/people_json.rb
|
99
100
|
- ingestor.gemspec
|
@@ -101,15 +102,18 @@ files:
|
|
101
102
|
- lib/ingestor/dsl.rb
|
102
103
|
- lib/ingestor/parser/base.rb
|
103
104
|
- lib/ingestor/parser/csv.rb
|
105
|
+
- lib/ingestor/parser/http.rb
|
104
106
|
- lib/ingestor/parser/json.rb
|
105
107
|
- lib/ingestor/parser/plain_text.rb
|
106
108
|
- lib/ingestor/parser/xml.rb
|
107
109
|
- lib/ingestor/proxy.rb
|
108
110
|
- lib/ingestor/tasks.rb
|
109
111
|
- lib/ingestor/version.rb
|
112
|
+
- samples/ChainList.zip
|
110
113
|
- samples/animals.csv
|
111
114
|
- samples/books.xml
|
112
115
|
- samples/colors.json
|
116
|
+
- samples/contracts.csv
|
113
117
|
- samples/flags.txt
|
114
118
|
- samples/people.json
|
115
119
|
- spec/cassettes/remote-zipped-files.yml
|