theman 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +25 -0
- data/README.rdoc +69 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/theman/themans_agency.rb +109 -0
- data/lib/theman.rb +4 -0
- data/spec/fixtures/temp_one.csv +5 -0
- data/spec/fixtures/temp_two.csv +21 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/theman_spec.rb +91 -0
- data/theman.gemspec +55 -0
- metadata +92 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
## MAC OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
## TEXTMATE
|
5
|
+
*.tmproj
|
6
|
+
tmtags
|
7
|
+
|
8
|
+
## EMACS
|
9
|
+
*~
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
|
13
|
+
## VIM
|
14
|
+
*.swp
|
15
|
+
|
16
|
+
## PROJECT::GENERAL
|
17
|
+
coverage
|
18
|
+
rdoc
|
19
|
+
pkg
|
20
|
+
|
21
|
+
## PROJECT::SPECIFIC
|
22
|
+
spec/database.yml
|
23
|
+
log/
|
24
|
+
spec/fixtures/fucking_huge.csv
|
25
|
+
spec/fucking_huge_spec.rb
|
data/README.rdoc
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
= theman
|
2
|
+
|
3
|
+
The man getting you down?
|
4
|
+
|
5
|
+
FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman.
|
6
|
+
|
7
|
+
== Installation
|
8
|
+
|
9
|
+
config.gem 'theman'
|
10
|
+
|
11
|
+
Or
|
12
|
+
|
13
|
+
gem install 'theman'
|
14
|
+
|
15
|
+
== Basic Usage
|
16
|
+
|
17
|
+
cabinet = ::Theman::Agency.new 'pretty.csv'
|
18
|
+
temp_model = cabinet.instance
|
19
|
+
temp_model.count
|
20
|
+
|
21
|
+
== Advanced Usage
|
22
|
+
|
23
|
+
cabinet = ::Theman::Agency.new 'ugly.csv' do |cabinet|
|
24
|
+
cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
|
25
|
+
cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
26
|
+
cabinet.table do |t|
|
27
|
+
t.date :date
|
28
|
+
t.integer :ext_id
|
29
|
+
t.float :amount
|
30
|
+
t.boolean :exited
|
31
|
+
end
|
32
|
+
end
|
33
|
+
temp_model = cabinet.instance
|
34
|
+
temp_model.where(:exited => true).count
|
35
|
+
|
36
|
+
In the above example we ommitted the last 15 rows and made some things null.
|
37
|
+
|
38
|
+
If you do not provide a table block your columns will be VARCHAR(255), you can cherry pick cols to change data types.
|
39
|
+
|
40
|
+
The temp table has no id col but you could add one after if you wanted
|
41
|
+
|
42
|
+
== Troubles
|
43
|
+
|
44
|
+
Table empty? the man has given you crappy data and PostgresSQL has silently dissed your data
|
45
|
+
|
46
|
+
== Copyright
|
47
|
+
|
48
|
+
(The MIT License)
|
49
|
+
|
50
|
+
Copyright (c) 2010 {mynameisrufus (Rufus Post)}[http://github.com/mynameisrufus]
|
51
|
+
|
52
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
53
|
+
a copy of this software and associated documentation files (the
|
54
|
+
"Software"), to deal in the Software without restriction, including
|
55
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
56
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
57
|
+
permit persons to whom the Software is furnished to do so, subject to
|
58
|
+
the following conditions:
|
59
|
+
|
60
|
+
The above copyright notice and this permission notice shall be
|
61
|
+
included in all copies or substantial portions of the Software.
|
62
|
+
|
63
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
64
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
65
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
66
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
67
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
68
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
69
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "theman"
|
8
|
+
gem.summary = %Q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
|
9
|
+
gem.description = %Q{longer description of your gem}
|
10
|
+
gem.email = "rufuspost@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/mynameisrufus/theman"
|
12
|
+
gem.authors = ["Rufus Post"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 2.0.0.beta.20"
|
14
|
+
end
|
15
|
+
Jeweler::GemcutterTasks.new
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
18
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module Theman
|
2
|
+
class Agency
|
3
|
+
attr_reader :instance, :table_proc, :null_replacements, :stream_location, :column_names
|
4
|
+
|
5
|
+
def initialize(*args)
|
6
|
+
cabinet_id = "c#{10.times.map{rand(9)}.join}"
|
7
|
+
@column_names = {}
|
8
|
+
@instance = Class.new(parent) do
|
9
|
+
instance_eval <<-EOV, __FILE__, __LINE__ + 1
|
10
|
+
set_table_name "#{cabinet_id}"
|
11
|
+
def table_name
|
12
|
+
"#{cabinet_id}"
|
13
|
+
end
|
14
|
+
def inspect
|
15
|
+
"Agent (#{cabinet_id})"
|
16
|
+
end
|
17
|
+
EOV
|
18
|
+
end
|
19
|
+
yield self if block_given?
|
20
|
+
unless args.first.nil?
|
21
|
+
@stream_location = args.first
|
22
|
+
create_table
|
23
|
+
pipe_it
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def instance_parent(klass)
|
28
|
+
@parent = klass
|
29
|
+
end
|
30
|
+
|
31
|
+
def table(&block)
|
32
|
+
yield self if block_given?
|
33
|
+
end
|
34
|
+
|
35
|
+
def nulls(*args)
|
36
|
+
@null_replacements = args
|
37
|
+
end
|
38
|
+
|
39
|
+
def seds(*args)
|
40
|
+
@extra_seds = args
|
41
|
+
end
|
42
|
+
|
43
|
+
def symbolize(name)
|
44
|
+
name.gsub(/ /,"_").gsub(/\W/, "").downcase.to_sym
|
45
|
+
end
|
46
|
+
|
47
|
+
%w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
|
48
|
+
class_eval <<-EOV, __FILE__, __LINE__ + 1
|
49
|
+
def #{column_type}(*args)
|
50
|
+
column(args[0], '#{column_type}', args[1].nil? ? {} : args[1])
|
51
|
+
end
|
52
|
+
EOV
|
53
|
+
end
|
54
|
+
|
55
|
+
protected
|
56
|
+
def sed_to_s
|
57
|
+
seds = []
|
58
|
+
seds << "| sed #{nulls_to_sed.join(" ")}" unless @null_replacements.nil?
|
59
|
+
seds << "| sed #{@extra_seds.join("| sed ")}" unless @extra_seds.nil?
|
60
|
+
return seds.join(" ") unless seds.empty?
|
61
|
+
end
|
62
|
+
|
63
|
+
def nulls_to_sed
|
64
|
+
@null_replacements.map do |null|
|
65
|
+
"-e 's/#{null.source}//g'"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def parent
|
70
|
+
@parent ||= ::ActiveRecord::Base
|
71
|
+
end
|
72
|
+
|
73
|
+
def create_table
|
74
|
+
f = File.open(stream_location, 'r')
|
75
|
+
instance.connection.create_table(instance.table_name, :temporary => true, :id => false) do |t|
|
76
|
+
f.each_line do |line|
|
77
|
+
line.split(/,/).each do |col|
|
78
|
+
column_name = col.is_a?(Hash) ? col : symbolize(col)
|
79
|
+
if custom = @column_names.fetch(column_name, nil)
|
80
|
+
t.column(*custom)
|
81
|
+
else
|
82
|
+
t.string column_name
|
83
|
+
end
|
84
|
+
end
|
85
|
+
break
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def column(name, type, options)
|
91
|
+
@column_names.merge! name.to_sym => [name, type, options]
|
92
|
+
end
|
93
|
+
|
94
|
+
def pipe_it(l = "")
|
95
|
+
raw = instance.connection.raw_connection
|
96
|
+
raw.query "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
|
97
|
+
command = "cat #{stream_location} #{sed_to_s}"
|
98
|
+
f = IO.popen(command)
|
99
|
+
begin
|
100
|
+
while f.read(8192, l)
|
101
|
+
raw.put_copy_data l
|
102
|
+
end
|
103
|
+
rescue EOFError
|
104
|
+
f.close
|
105
|
+
end
|
106
|
+
raw.put_copy_end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/theman.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
"DATE","ORG_CODE","JOB_SEEKER_ID"
|
2
|
+
"23/09/2010","XXXX","342310094609"
|
3
|
+
"23/09/2010","XXXX","234583366709"
|
4
|
+
"23/09/2010","XXXX","342939870209"
|
5
|
+
"23/09/2010","XXXX","234098040009"
|
6
|
+
"23/09/2010","XXXX","987919906509"
|
7
|
+
|
8
|
+
"@!!@"
|
9
|
+
"Subscription ID: XXXXXXXXXXXXXXXXXXXXXX"
|
10
|
+
"Managed By: XXXXX"
|
11
|
+
"Record Count: XXXXX"
|
12
|
+
"Report Date: XXXXXXX"
|
13
|
+
"Extract Date: XXXXXXX"
|
14
|
+
"Data Load Freq: DAILY"
|
15
|
+
"Report Name: XXXXX"
|
16
|
+
"Report Desc: XXX XXXX XXXXX XXX XXX XX XX"
|
17
|
+
"Report Message: "
|
18
|
+
"Solution Name: XXXXXX"
|
19
|
+
"Solution Desc: "
|
20
|
+
"REPORT CRITERIA BELOW"
|
21
|
+
"Organisation: XXXX"
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
require 'rails'
|
4
|
+
|
5
|
+
require 'active_record'
|
6
|
+
Rails.env = 'test'
|
7
|
+
ActiveRecord::Base.configurations = YAML.load_file(File.join("spec", "database.yml"))
|
8
|
+
FileUtils.mkdir_p "#{Dir.pwd}/log"
|
9
|
+
logfile= File.new("#{Dir.pwd}/log/database.log", "w")
|
10
|
+
ActiveRecord::Base.logger = Logger.new(File.open(logfile, 'w'))
|
11
|
+
ActiveRecord::Base.establish_connection(ActiveRecord::Base.configurations.fetch(Rails.env))
|
12
|
+
|
13
|
+
require 'theman'
|
14
|
+
require 'rspec'
|
15
|
+
require 'rspec/autorun'
|
data/spec/theman_spec.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Theman::Agency, "instance object" do
|
4
|
+
before do
|
5
|
+
@instance = ::Theman::Agency.new.instance
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should superclass active record" do
|
9
|
+
@instance.superclass.should == ActiveRecord::Base
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should have connection" do
|
13
|
+
@instance.connection.class.should == ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should have a table name" do
|
17
|
+
@instance.table_name.should match /c[0-9]{10}/
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should have an ispect method" do
|
21
|
+
@instance.inspect.should match /Agent/
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe Theman::Agency, "instance methods" do
|
26
|
+
it "should downcase and symbolize" do
|
27
|
+
Theman::Agency.new.symbolize("STRANGE NAME").should == :strange_name
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe Theman::Agency, "basic" do
|
32
|
+
before do
|
33
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
|
34
|
+
@cabinet = ::Theman::Agency.new @csv
|
35
|
+
@instance = @cabinet.instance
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should have all the records from the csv" do
|
39
|
+
@instance.count.should == 4
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe Theman::Agency, "sed chomp" do
|
44
|
+
before do
|
45
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
|
46
|
+
@cabinet = ::Theman::Agency.new @csv do |cabinet|
|
47
|
+
cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
48
|
+
end
|
49
|
+
@instance = @cabinet.instance
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should have all the records from the csv" do
|
53
|
+
@instance.count.should == 5
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe Theman::Agency, "data types" do
|
58
|
+
before do
|
59
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
|
60
|
+
@cabinet = ::Theman::Agency.new @csv do |cabinet|
|
61
|
+
cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
|
62
|
+
cabinet.table do |t|
|
63
|
+
t.date :col_date
|
64
|
+
t.boolean :col_four
|
65
|
+
t.float :col_five
|
66
|
+
end
|
67
|
+
end
|
68
|
+
@instance = @cabinet.instance
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should create date col" do
|
72
|
+
@instance.first.col_date.class.should == Date
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should create boolean col" do
|
76
|
+
@instance.where(:col_four => true).count.should == 2
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should create float col" do
|
80
|
+
@instance.where("col_five > 10.0").count.should == 2
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should have an array of nulls" do
|
84
|
+
@cabinet.null_replacements.should == [/"N"/, /"UNKNOWN"/, /""/]
|
85
|
+
end
|
86
|
+
|
87
|
+
it "should have nulls not strings" do
|
88
|
+
@instance.where(:col_two => nil).count.should == 2
|
89
|
+
@instance.where(:col_three => nil).count.should == 2
|
90
|
+
end
|
91
|
+
end
|
data/theman.gemspec
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{theman}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Rufus Post"]
|
12
|
+
s.date = %q{2010-09-28}
|
13
|
+
s.description = %q{longer description of your gem}
|
14
|
+
s.email = %q{rufuspost@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"README.rdoc"
|
17
|
+
]
|
18
|
+
s.files = [
|
19
|
+
".document",
|
20
|
+
".gitignore",
|
21
|
+
"README.rdoc",
|
22
|
+
"Rakefile",
|
23
|
+
"VERSION",
|
24
|
+
"lib/theman.rb",
|
25
|
+
"lib/theman/themans_agency.rb",
|
26
|
+
"spec/fixtures/temp_one.csv",
|
27
|
+
"spec/fixtures/temp_two.csv",
|
28
|
+
"spec/spec_helper.rb",
|
29
|
+
"spec/theman_spec.rb",
|
30
|
+
"theman.gemspec"
|
31
|
+
]
|
32
|
+
s.homepage = %q{http://github.com/mynameisrufus/theman}
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = %q{1.3.7}
|
36
|
+
s.summary = %q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
|
37
|
+
s.test_files = [
|
38
|
+
"spec/theman_spec.rb",
|
39
|
+
"spec/spec_helper.rb"
|
40
|
+
]
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
44
|
+
s.specification_version = 3
|
45
|
+
|
46
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
47
|
+
s.add_development_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
|
48
|
+
else
|
49
|
+
s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
|
50
|
+
end
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: theman
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Rufus Post
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-09-28 00:00:00 +10:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 2
|
30
|
+
- 0
|
31
|
+
- 0
|
32
|
+
- beta
|
33
|
+
- 20
|
34
|
+
version: 2.0.0.beta.20
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
description: longer description of your gem
|
38
|
+
email: rufuspost@gmail.com
|
39
|
+
executables: []
|
40
|
+
|
41
|
+
extensions: []
|
42
|
+
|
43
|
+
extra_rdoc_files:
|
44
|
+
- README.rdoc
|
45
|
+
files:
|
46
|
+
- .document
|
47
|
+
- .gitignore
|
48
|
+
- README.rdoc
|
49
|
+
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- lib/theman.rb
|
52
|
+
- lib/theman/themans_agency.rb
|
53
|
+
- spec/fixtures/temp_one.csv
|
54
|
+
- spec/fixtures/temp_two.csv
|
55
|
+
- spec/spec_helper.rb
|
56
|
+
- spec/theman_spec.rb
|
57
|
+
- theman.gemspec
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: http://github.com/mynameisrufus/theman
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options:
|
64
|
+
- --charset=UTF-8
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
none: false
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
requirements: []
|
84
|
+
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 1.3.7
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: PostgreSQL AR temporary table generator using PostgreSQL COPY
|
90
|
+
test_files:
|
91
|
+
- spec/theman_spec.rb
|
92
|
+
- spec/spec_helper.rb
|