ingestor 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +21 -0
- data/Gemfile +18 -0
- data/Guardfile +11 -0
- data/LICENSE.txt +22 -0
- data/README.md +211 -0
- data/Rakefile +7 -0
- data/bin/ingest +73 -0
- data/examples/text_parsing.rb +56 -0
- data/examples/xml_parsing.rb +52 -0
- data/ingestor.gemspec +23 -0
- data/lib/ingestor.rb +37 -0
- data/lib/ingestor/dsl.rb +110 -0
- data/lib/ingestor/parser/base.rb +28 -0
- data/lib/ingestor/parser/csv.rb +8 -0
- data/lib/ingestor/parser/json.rb +8 -0
- data/lib/ingestor/parser/plain_text.rb +44 -0
- data/lib/ingestor/parser/xml.rb +37 -0
- data/lib/ingestor/proxy.rb +113 -0
- data/lib/ingestor/tasks.rb +15 -0
- data/lib/ingestor/version.rb +3 -0
- data/samples/animals.csv +7 -0
- data/samples/books.xml +32 -0
- data/samples/colors.json +30 -0
- data/samples/flags.txt +12 -0
- data/samples/people.json +26 -0
- data/spec/cassettes/remote-zipped-files.yml +186 -0
- data/spec/lib/ingestor/dsl_spec.rb +114 -0
- data/spec/lib/ingestor/parser/csv_spec.rb +5 -0
- data/spec/lib/ingestor/parser/json_spec.rb +5 -0
- data/spec/lib/ingestor/parser/plain_text_spec.rb +24 -0
- data/spec/lib/ingestor/parser/xml_spec.rb +25 -0
- data/spec/lib/ingestor/proxy_spec.rb +129 -0
- data/spec/lib/ingestor_spec.rb +19 -0
- data/spec/orm/active_record.rb +33 -0
- data/spec/orm/database.example.yml +15 -0
- data/spec/spec_helper.rb +21 -0
- metadata +139 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ingestor/parser/xml'
|
3
|
+
|
4
|
+
describe Ingestor::Parser::Xml do
|
5
|
+
describe '#process!' do
|
6
|
+
before do
|
7
|
+
@proxy = ingest("./samples/books.xml") do
|
8
|
+
parser :xml
|
9
|
+
parser_options({
|
10
|
+
xpath: '//book'
|
11
|
+
})
|
12
|
+
finder{|attrs| Dummy.new}
|
13
|
+
map_attributes{|values|
|
14
|
+
puts values
|
15
|
+
{:name => values['book']['title']}
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should be able to process an XML file' do
|
21
|
+
|
22
|
+
Dummy.first.name.should eq "XML Developer's Guide"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
def default_test_ingestor
|
4
|
+
ingest("./samples/flags.txt") do
|
5
|
+
includes_header true
|
6
|
+
finder{|values| Country.new}
|
7
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
describe Ingestor::Proxy do
|
12
|
+
describe 'loading local files' do
|
13
|
+
before :each do
|
14
|
+
@proxy = ingest("./samples/flags.txt") do
|
15
|
+
finder{|values| Country.new}
|
16
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
it 'should know if a file is local' do
|
20
|
+
@proxy.should be_local
|
21
|
+
@proxy.should_not be_remote
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should know if a file is compressed' do
|
25
|
+
@proxy.should_not be_compressed
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'loading remote files' do
|
30
|
+
use_vcr_cassette 'remote-zipped-files'
|
31
|
+
|
32
|
+
before :each do
|
33
|
+
@proxy = ingest("https://www.ian.com/affiliatecenter/include/V2/ChainList.zip") do
|
34
|
+
finder{|values| Dummy.new}
|
35
|
+
map_attributes do |values|
|
36
|
+
{:id => values[0], :name => values[1]}
|
37
|
+
end
|
38
|
+
compressed true
|
39
|
+
end
|
40
|
+
end
|
41
|
+
it 'should know if a file is remote' do
|
42
|
+
@proxy.should_not be_local
|
43
|
+
@proxy.should be_remote
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should know if a file is compressed' do
|
47
|
+
@proxy.should be_compressed
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should create a tempfile for remote files' do
|
51
|
+
File.exists?( @proxy.document.path ).should be true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
pending '#sample'
|
56
|
+
|
57
|
+
describe '#includes_header' do
|
58
|
+
it 'should include a header' do
|
59
|
+
ingest("./samples/flags.txt") do
|
60
|
+
includes_header true
|
61
|
+
finder{|values| Country.new}
|
62
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
63
|
+
end.header.should == "Country|Colors|Count|Secrets"
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should not include a header' do
|
67
|
+
ingest("./samples/flags.txt") do
|
68
|
+
includes_header false
|
69
|
+
finder{|values| Country.new}
|
70
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
71
|
+
end.header.should be_nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe '#before' do
|
76
|
+
before :each do
|
77
|
+
ingest("./samples/flags.txt") do
|
78
|
+
includes_header true
|
79
|
+
finder{|values| Country.new}
|
80
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2], :secrets => values[3]} }
|
81
|
+
before{|attrs|
|
82
|
+
attrs[:name].reverse!
|
83
|
+
attrs
|
84
|
+
}
|
85
|
+
end
|
86
|
+
end
|
87
|
+
it 'should modify values in place when using a #before callback' do
|
88
|
+
Country.first.name.should == 'rodavlaS lE'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe '#after' do
|
93
|
+
before :each do
|
94
|
+
@records = []
|
95
|
+
ingest("./samples/flags.txt") do
|
96
|
+
includes_header true
|
97
|
+
finder{|values| Country.new}
|
98
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2], :secrets => values[3]} }
|
99
|
+
after{|record|
|
100
|
+
@records << record
|
101
|
+
}
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should pass the current record to an #after callback' do
|
106
|
+
@records.length.should be(11)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe '#processor' do
|
111
|
+
before do
|
112
|
+
ingest("./samples/flags.txt") do
|
113
|
+
includes_header true
|
114
|
+
finder{|values| Country.new}
|
115
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2], :secrets => values[3]} }
|
116
|
+
processor{|attrs,record|
|
117
|
+
record.update_attributes attrs
|
118
|
+
record.secrets = "Squirrel Party"
|
119
|
+
record.save
|
120
|
+
record
|
121
|
+
}
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'should use the optional #processor when provided' do
|
126
|
+
Country.where(secrets: 'Squirrel Party').count.should be(11)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ingestor/parser/xml'
|
3
|
+
|
4
|
+
describe Ingestor do
|
5
|
+
it "should have a version" do
|
6
|
+
Ingestor::VERSION.should_not be_nil
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should have plain text as the default parser' do
|
10
|
+
Ingestor.parser_for(:plain_text).should be(Ingestor::Parser::PlainText)
|
11
|
+
Ingestor.parser_for(:xml).should be(Ingestor::Parser::Xml)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should raise an exception for a bogus parser type' do
|
15
|
+
lambda{
|
16
|
+
Ingestor.parser_for(:bogus)
|
17
|
+
}.should raise_exception
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'active_record'
|
3
|
+
|
4
|
+
ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
|
5
|
+
ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
|
6
|
+
|
7
|
+
ActiveRecord::Migration.verbose = false
|
8
|
+
|
9
|
+
class TestMigration < ActiveRecord::Migration
|
10
|
+
def self.up
|
11
|
+
create_table :countries, :force => true do |t|
|
12
|
+
t.column :name, :string
|
13
|
+
t.column :colors, :string
|
14
|
+
t.column :count, :integer
|
15
|
+
t.column :secrets, :string
|
16
|
+
end
|
17
|
+
|
18
|
+
create_table :dummies, :force => true do |t|
|
19
|
+
t.column :name, :string
|
20
|
+
t.timestamps
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.down
|
25
|
+
drop_table :countries
|
26
|
+
drop_table :dummies
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Dummy < ActiveRecord::Base;end;
|
31
|
+
class Country < ActiveRecord::Base
|
32
|
+
attr_protected :secrets
|
33
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
sqlite3:
|
2
|
+
adapter: sqlite3
|
3
|
+
database: ":memory:"
|
4
|
+
postgresql:
|
5
|
+
adapter: postgresql
|
6
|
+
database: ingestor_test
|
7
|
+
username: ingestor
|
8
|
+
password: ingestor
|
9
|
+
min_messages: WARNING
|
10
|
+
mysql:
|
11
|
+
adapter: mysql
|
12
|
+
host: localhost
|
13
|
+
database: ingestor_test
|
14
|
+
username: ingestor
|
15
|
+
password: ingestor
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'ingestor'
|
5
|
+
require 'vcr'
|
6
|
+
require 'orm/active_record'
|
7
|
+
|
8
|
+
VCR.configure do |c|
|
9
|
+
c.cassette_library_dir = 'spec/cassettes'
|
10
|
+
c.stub_with :fakeweb
|
11
|
+
end
|
12
|
+
|
13
|
+
RSpec.configure do |config|
|
14
|
+
config.extend VCR::RSpec::Macros
|
15
|
+
config.before(:all) { TestMigration.up }
|
16
|
+
config.after(:all) { TestMigration.down }
|
17
|
+
config.after(:each){
|
18
|
+
Country.delete_all
|
19
|
+
Dummy.delete_all
|
20
|
+
}
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ingestor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Cory O'Daniel
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: docile
|
16
|
+
requirement: &70343549624980 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70343549624980
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rubyzip
|
27
|
+
requirement: &70343549624400 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70343549624400
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: thor
|
38
|
+
requirement: &70343549623860 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70343549623860
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: activesupport
|
49
|
+
requirement: &70343549623160 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 3.2.0
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70343549623160
|
58
|
+
description: Ingesting local and remote data files into ActiveRecord
|
59
|
+
email:
|
60
|
+
- github@coryodaniel.com
|
61
|
+
executables:
|
62
|
+
- ingest
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- .gitignore
|
67
|
+
- Gemfile
|
68
|
+
- Guardfile
|
69
|
+
- LICENSE.txt
|
70
|
+
- README.md
|
71
|
+
- Rakefile
|
72
|
+
- bin/ingest
|
73
|
+
- examples/text_parsing.rb
|
74
|
+
- examples/xml_parsing.rb
|
75
|
+
- ingestor.gemspec
|
76
|
+
- lib/ingestor.rb
|
77
|
+
- lib/ingestor/dsl.rb
|
78
|
+
- lib/ingestor/parser/base.rb
|
79
|
+
- lib/ingestor/parser/csv.rb
|
80
|
+
- lib/ingestor/parser/json.rb
|
81
|
+
- lib/ingestor/parser/plain_text.rb
|
82
|
+
- lib/ingestor/parser/xml.rb
|
83
|
+
- lib/ingestor/proxy.rb
|
84
|
+
- lib/ingestor/tasks.rb
|
85
|
+
- lib/ingestor/version.rb
|
86
|
+
- samples/animals.csv
|
87
|
+
- samples/books.xml
|
88
|
+
- samples/colors.json
|
89
|
+
- samples/flags.txt
|
90
|
+
- samples/people.json
|
91
|
+
- spec/cassettes/remote-zipped-files.yml
|
92
|
+
- spec/lib/ingestor/dsl_spec.rb
|
93
|
+
- spec/lib/ingestor/parser/.DS_Store
|
94
|
+
- spec/lib/ingestor/parser/csv_spec.rb
|
95
|
+
- spec/lib/ingestor/parser/json_spec.rb
|
96
|
+
- spec/lib/ingestor/parser/plain_text_spec.rb
|
97
|
+
- spec/lib/ingestor/parser/xml_spec.rb
|
98
|
+
- spec/lib/ingestor/proxy_spec.rb
|
99
|
+
- spec/lib/ingestor_spec.rb
|
100
|
+
- spec/orm/active_record.rb
|
101
|
+
- spec/orm/database.example.yml
|
102
|
+
- spec/spec_helper.rb
|
103
|
+
homepage: http://github.com/coryodaniel/ingestor
|
104
|
+
licenses: []
|
105
|
+
post_install_message:
|
106
|
+
rdoc_options: []
|
107
|
+
require_paths:
|
108
|
+
- lib
|
109
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
+
none: false
|
111
|
+
requirements:
|
112
|
+
- - ! '>='
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ! '>='
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
requirements: []
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 1.8.10
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: Ingesting local and remote data files into ActiveRecord
|
127
|
+
test_files:
|
128
|
+
- spec/cassettes/remote-zipped-files.yml
|
129
|
+
- spec/lib/ingestor/dsl_spec.rb
|
130
|
+
- spec/lib/ingestor/parser/.DS_Store
|
131
|
+
- spec/lib/ingestor/parser/csv_spec.rb
|
132
|
+
- spec/lib/ingestor/parser/json_spec.rb
|
133
|
+
- spec/lib/ingestor/parser/plain_text_spec.rb
|
134
|
+
- spec/lib/ingestor/parser/xml_spec.rb
|
135
|
+
- spec/lib/ingestor/proxy_spec.rb
|
136
|
+
- spec/lib/ingestor_spec.rb
|
137
|
+
- spec/orm/active_record.rb
|
138
|
+
- spec/orm/database.example.yml
|
139
|
+
- spec/spec_helper.rb
|