ingestor 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +21 -0
- data/Gemfile +18 -0
- data/Guardfile +11 -0
- data/LICENSE.txt +22 -0
- data/README.md +211 -0
- data/Rakefile +7 -0
- data/bin/ingest +73 -0
- data/examples/text_parsing.rb +56 -0
- data/examples/xml_parsing.rb +52 -0
- data/ingestor.gemspec +23 -0
- data/lib/ingestor.rb +37 -0
- data/lib/ingestor/dsl.rb +110 -0
- data/lib/ingestor/parser/base.rb +28 -0
- data/lib/ingestor/parser/csv.rb +8 -0
- data/lib/ingestor/parser/json.rb +8 -0
- data/lib/ingestor/parser/plain_text.rb +44 -0
- data/lib/ingestor/parser/xml.rb +37 -0
- data/lib/ingestor/proxy.rb +113 -0
- data/lib/ingestor/tasks.rb +15 -0
- data/lib/ingestor/version.rb +3 -0
- data/samples/animals.csv +7 -0
- data/samples/books.xml +32 -0
- data/samples/colors.json +30 -0
- data/samples/flags.txt +12 -0
- data/samples/people.json +26 -0
- data/spec/cassettes/remote-zipped-files.yml +186 -0
- data/spec/lib/ingestor/dsl_spec.rb +114 -0
- data/spec/lib/ingestor/parser/csv_spec.rb +5 -0
- data/spec/lib/ingestor/parser/json_spec.rb +5 -0
- data/spec/lib/ingestor/parser/plain_text_spec.rb +24 -0
- data/spec/lib/ingestor/parser/xml_spec.rb +25 -0
- data/spec/lib/ingestor/proxy_spec.rb +129 -0
- data/spec/lib/ingestor_spec.rb +19 -0
- data/spec/orm/active_record.rb +33 -0
- data/spec/orm/database.example.yml +15 -0
- data/spec/spec_helper.rb +21 -0
- metadata +139 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ingestor/parser/xml'
|
3
|
+
|
4
|
+
describe Ingestor::Parser::Xml do
|
5
|
+
describe '#process!' do
|
6
|
+
before do
|
7
|
+
@proxy = ingest("./samples/books.xml") do
|
8
|
+
parser :xml
|
9
|
+
parser_options({
|
10
|
+
xpath: '//book'
|
11
|
+
})
|
12
|
+
finder{|attrs| Dummy.new}
|
13
|
+
map_attributes{|values|
|
14
|
+
puts values
|
15
|
+
{:name => values['book']['title']}
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should be able to process an XML file' do
|
21
|
+
|
22
|
+
Dummy.first.name.should eq "XML Developer's Guide"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
def default_test_ingestor
|
4
|
+
ingest("./samples/flags.txt") do
|
5
|
+
includes_header true
|
6
|
+
finder{|values| Country.new}
|
7
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
describe Ingestor::Proxy do
|
12
|
+
describe 'loading local files' do
|
13
|
+
before :each do
|
14
|
+
@proxy = ingest("./samples/flags.txt") do
|
15
|
+
finder{|values| Country.new}
|
16
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
it 'should know if a file is local' do
|
20
|
+
@proxy.should be_local
|
21
|
+
@proxy.should_not be_remote
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should know if a file is compressed' do
|
25
|
+
@proxy.should_not be_compressed
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'loading remote files' do
|
30
|
+
use_vcr_cassette 'remote-zipped-files'
|
31
|
+
|
32
|
+
before :each do
|
33
|
+
@proxy = ingest("https://www.ian.com/affiliatecenter/include/V2/ChainList.zip") do
|
34
|
+
finder{|values| Dummy.new}
|
35
|
+
map_attributes do |values|
|
36
|
+
{:id => values[0], :name => values[1]}
|
37
|
+
end
|
38
|
+
compressed true
|
39
|
+
end
|
40
|
+
end
|
41
|
+
it 'should know if a file is remote' do
|
42
|
+
@proxy.should_not be_local
|
43
|
+
@proxy.should be_remote
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should know if a file is compressed' do
|
47
|
+
@proxy.should be_compressed
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should create a tempfile for remote files' do
|
51
|
+
File.exists?( @proxy.document.path ).should be true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
pending '#sample'
|
56
|
+
|
57
|
+
describe '#includes_header' do
|
58
|
+
it 'should include a header' do
|
59
|
+
ingest("./samples/flags.txt") do
|
60
|
+
includes_header true
|
61
|
+
finder{|values| Country.new}
|
62
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
63
|
+
end.header.should == "Country|Colors|Count|Secrets"
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should not include a header' do
|
67
|
+
ingest("./samples/flags.txt") do
|
68
|
+
includes_header false
|
69
|
+
finder{|values| Country.new}
|
70
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2]} }
|
71
|
+
end.header.should be_nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe '#before' do
|
76
|
+
before :each do
|
77
|
+
ingest("./samples/flags.txt") do
|
78
|
+
includes_header true
|
79
|
+
finder{|values| Country.new}
|
80
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2], :secrets => values[3]} }
|
81
|
+
before{|attrs|
|
82
|
+
attrs[:name].reverse!
|
83
|
+
attrs
|
84
|
+
}
|
85
|
+
end
|
86
|
+
end
|
87
|
+
it 'should modify values in place when using a #before callback' do
|
88
|
+
Country.first.name.should == 'rodavlaS lE'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe '#after' do
|
93
|
+
before :each do
|
94
|
+
@records = []
|
95
|
+
ingest("./samples/flags.txt") do
|
96
|
+
includes_header true
|
97
|
+
finder{|values| Country.new}
|
98
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2], :secrets => values[3]} }
|
99
|
+
after{|record|
|
100
|
+
@records << record
|
101
|
+
}
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should pass the current record to an #after callback' do
|
106
|
+
@records.length.should be(11)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe '#processor' do
|
111
|
+
before do
|
112
|
+
ingest("./samples/flags.txt") do
|
113
|
+
includes_header true
|
114
|
+
finder{|values| Country.new}
|
115
|
+
map_attributes{|values| {:name => values[0], :colors => values[1], :count => values[2], :secrets => values[3]} }
|
116
|
+
processor{|attrs,record|
|
117
|
+
record.update_attributes attrs
|
118
|
+
record.secrets = "Squirrel Party"
|
119
|
+
record.save
|
120
|
+
record
|
121
|
+
}
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'should use the optional #processor when provided' do
|
126
|
+
Country.where(secrets: 'Squirrel Party').count.should be(11)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ingestor/parser/xml'
|
3
|
+
|
4
|
+
describe Ingestor do
|
5
|
+
it "should have a version" do
|
6
|
+
Ingestor::VERSION.should_not be_nil
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should have plain text as the default parser' do
|
10
|
+
Ingestor.parser_for(:plain_text).should be(Ingestor::Parser::PlainText)
|
11
|
+
Ingestor.parser_for(:xml).should be(Ingestor::Parser::Xml)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should raise an exception for a bogus parser type' do
|
15
|
+
lambda{
|
16
|
+
Ingestor.parser_for(:bogus)
|
17
|
+
}.should raise_exception
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'active_record'
|
3
|
+
|
4
|
+
ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
|
5
|
+
ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
|
6
|
+
|
7
|
+
ActiveRecord::Migration.verbose = false
|
8
|
+
|
9
|
+
class TestMigration < ActiveRecord::Migration
|
10
|
+
def self.up
|
11
|
+
create_table :countries, :force => true do |t|
|
12
|
+
t.column :name, :string
|
13
|
+
t.column :colors, :string
|
14
|
+
t.column :count, :integer
|
15
|
+
t.column :secrets, :string
|
16
|
+
end
|
17
|
+
|
18
|
+
create_table :dummies, :force => true do |t|
|
19
|
+
t.column :name, :string
|
20
|
+
t.timestamps
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.down
|
25
|
+
drop_table :countries
|
26
|
+
drop_table :dummies
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Dummy < ActiveRecord::Base;end;
|
31
|
+
class Country < ActiveRecord::Base
|
32
|
+
attr_protected :secrets
|
33
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
sqlite3:
|
2
|
+
adapter: sqlite3
|
3
|
+
database: ":memory:"
|
4
|
+
postgresql:
|
5
|
+
adapter: postgresql
|
6
|
+
database: ingestor_test
|
7
|
+
username: ingestor
|
8
|
+
password: ingestor
|
9
|
+
min_messages: WARNING
|
10
|
+
mysql:
|
11
|
+
adapter: mysql
|
12
|
+
host: localhost
|
13
|
+
database: ingestor_test
|
14
|
+
username: ingestor
|
15
|
+
password: ingestor
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'ingestor'
|
5
|
+
require 'vcr'
|
6
|
+
require 'orm/active_record'
|
7
|
+
|
8
|
+
VCR.configure do |c|
|
9
|
+
c.cassette_library_dir = 'spec/cassettes'
|
10
|
+
c.stub_with :fakeweb
|
11
|
+
end
|
12
|
+
|
13
|
+
RSpec.configure do |config|
|
14
|
+
config.extend VCR::RSpec::Macros
|
15
|
+
config.before(:all) { TestMigration.up }
|
16
|
+
config.after(:all) { TestMigration.down }
|
17
|
+
config.after(:each){
|
18
|
+
Country.delete_all
|
19
|
+
Dummy.delete_all
|
20
|
+
}
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ingestor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Cory O'Daniel
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: docile
|
16
|
+
requirement: &70343549624980 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70343549624980
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rubyzip
|
27
|
+
requirement: &70343549624400 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70343549624400
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: thor
|
38
|
+
requirement: &70343549623860 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70343549623860
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: activesupport
|
49
|
+
requirement: &70343549623160 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 3.2.0
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70343549623160
|
58
|
+
description: Ingesting local and remote data files into ActiveRecord
|
59
|
+
email:
|
60
|
+
- github@coryodaniel.com
|
61
|
+
executables:
|
62
|
+
- ingest
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- .gitignore
|
67
|
+
- Gemfile
|
68
|
+
- Guardfile
|
69
|
+
- LICENSE.txt
|
70
|
+
- README.md
|
71
|
+
- Rakefile
|
72
|
+
- bin/ingest
|
73
|
+
- examples/text_parsing.rb
|
74
|
+
- examples/xml_parsing.rb
|
75
|
+
- ingestor.gemspec
|
76
|
+
- lib/ingestor.rb
|
77
|
+
- lib/ingestor/dsl.rb
|
78
|
+
- lib/ingestor/parser/base.rb
|
79
|
+
- lib/ingestor/parser/csv.rb
|
80
|
+
- lib/ingestor/parser/json.rb
|
81
|
+
- lib/ingestor/parser/plain_text.rb
|
82
|
+
- lib/ingestor/parser/xml.rb
|
83
|
+
- lib/ingestor/proxy.rb
|
84
|
+
- lib/ingestor/tasks.rb
|
85
|
+
- lib/ingestor/version.rb
|
86
|
+
- samples/animals.csv
|
87
|
+
- samples/books.xml
|
88
|
+
- samples/colors.json
|
89
|
+
- samples/flags.txt
|
90
|
+
- samples/people.json
|
91
|
+
- spec/cassettes/remote-zipped-files.yml
|
92
|
+
- spec/lib/ingestor/dsl_spec.rb
|
93
|
+
- spec/lib/ingestor/parser/.DS_Store
|
94
|
+
- spec/lib/ingestor/parser/csv_spec.rb
|
95
|
+
- spec/lib/ingestor/parser/json_spec.rb
|
96
|
+
- spec/lib/ingestor/parser/plain_text_spec.rb
|
97
|
+
- spec/lib/ingestor/parser/xml_spec.rb
|
98
|
+
- spec/lib/ingestor/proxy_spec.rb
|
99
|
+
- spec/lib/ingestor_spec.rb
|
100
|
+
- spec/orm/active_record.rb
|
101
|
+
- spec/orm/database.example.yml
|
102
|
+
- spec/spec_helper.rb
|
103
|
+
homepage: http://github.com/coryodaniel/ingestor
|
104
|
+
licenses: []
|
105
|
+
post_install_message:
|
106
|
+
rdoc_options: []
|
107
|
+
require_paths:
|
108
|
+
- lib
|
109
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
+
none: false
|
111
|
+
requirements:
|
112
|
+
- - ! '>='
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ! '>='
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
requirements: []
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 1.8.10
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: Ingesting local and remote data files into ActiveRecord
|
127
|
+
test_files:
|
128
|
+
- spec/cassettes/remote-zipped-files.yml
|
129
|
+
- spec/lib/ingestor/dsl_spec.rb
|
130
|
+
- spec/lib/ingestor/parser/.DS_Store
|
131
|
+
- spec/lib/ingestor/parser/csv_spec.rb
|
132
|
+
- spec/lib/ingestor/parser/json_spec.rb
|
133
|
+
- spec/lib/ingestor/parser/plain_text_spec.rb
|
134
|
+
- spec/lib/ingestor/parser/xml_spec.rb
|
135
|
+
- spec/lib/ingestor/proxy_spec.rb
|
136
|
+
- spec/lib/ingestor_spec.rb
|
137
|
+
- spec/orm/active_record.rb
|
138
|
+
- spec/orm/database.example.yml
|
139
|
+
- spec/spec_helper.rb
|