theman 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,25 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
22
+ spec/database.yml
23
+ log/
24
+ spec/fixtures/fucking_huge.csv
25
+ spec/fucking_huge_spec.rb
data/README.rdoc ADDED
@@ -0,0 +1,69 @@
1
+ = theman
2
+
3
+ The man getting you down?
4
+
5
+ FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman.
6
+
7
+ == Installation
8
+
9
+ config.gem 'theman'
10
+
11
+ Or
12
+
13
+ gem install 'theman'
14
+
15
+ == Basic Usage
16
+
17
+ cabinet = ::Theman::Agency.new 'pretty.csv'
18
+ temp_model = cabinet.instance
19
+ temp_model.count
20
+
21
+ == Advanced Usage
22
+
23
+ cabinet = ::Theman::Agency.new 'ugly.csv' do |cabinet|
24
+ cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
25
+ cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
26
+ cabinet.table do |t|
27
+ t.date :date
28
+ t.integer :ext_id
29
+ t.float :amount
30
+ t.boolean :exited
31
+ end
32
+ end
33
+ temp_model = cabinet.instance
34
+ temp_model.where(:exited => true).count
35
+
36
+ In the above example we ommitted the last 15 rows and made some things null.
37
+
38
+ If you do not provide a table block your columns will be VARCHAR(255), you can cherry pick cols to change data types.
39
+
40
+ The temp table has no id col but you could add one after if you wanted
41
+
42
+ == Troubles
43
+
44
+ Table empty? the man has given you crappy data and PostgresSQL has silently dissed your data
45
+
46
+ == Copyright
47
+
48
+ (The MIT License)
49
+
50
+ Copyright (c) 2010 {mynameisrufus (Rufus Post)}[http://github.com/mynameisrufus]
51
+
52
+ Permission is hereby granted, free of charge, to any person obtaining
53
+ a copy of this software and associated documentation files (the
54
+ "Software"), to deal in the Software without restriction, including
55
+ without limitation the rights to use, copy, modify, merge, publish,
56
+ distribute, sublicense, and/or sell copies of the Software, and to
57
+ permit persons to whom the Software is furnished to do so, subject to
58
+ the following conditions:
59
+
60
+ The above copyright notice and this permission notice shall be
61
+ included in all copies or substantial portions of the Software.
62
+
63
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
64
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
66
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
67
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
68
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
69
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "theman"
8
+ gem.summary = %Q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
9
+ gem.description = %Q{longer description of your gem}
10
+ gem.email = "rufuspost@gmail.com"
11
+ gem.homepage = "http://github.com/mynameisrufus/theman"
12
+ gem.authors = ["Rufus Post"]
13
+ gem.add_development_dependency "rspec", ">= 2.0.0.beta.20"
14
+ end
15
+ Jeweler::GemcutterTasks.new
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
18
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,109 @@
1
+ module Theman
2
+ class Agency
3
+ attr_reader :instance, :table_proc, :null_replacements, :stream_location, :column_names
4
+
5
+ def initialize(*args)
6
+ cabinet_id = "c#{10.times.map{rand(9)}.join}"
7
+ @column_names = {}
8
+ @instance = Class.new(parent) do
9
+ instance_eval <<-EOV, __FILE__, __LINE__ + 1
10
+ set_table_name "#{cabinet_id}"
11
+ def table_name
12
+ "#{cabinet_id}"
13
+ end
14
+ def inspect
15
+ "Agent (#{cabinet_id})"
16
+ end
17
+ EOV
18
+ end
19
+ yield self if block_given?
20
+ unless args.first.nil?
21
+ @stream_location = args.first
22
+ create_table
23
+ pipe_it
24
+ end
25
+ end
26
+
27
+ def instance_parent(klass)
28
+ @parent = klass
29
+ end
30
+
31
+ def table(&block)
32
+ yield self if block_given?
33
+ end
34
+
35
+ def nulls(*args)
36
+ @null_replacements = args
37
+ end
38
+
39
+ def seds(*args)
40
+ @extra_seds = args
41
+ end
42
+
43
+ def symbolize(name)
44
+ name.gsub(/ /,"_").gsub(/\W/, "").downcase.to_sym
45
+ end
46
+
47
+ %w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
48
+ class_eval <<-EOV, __FILE__, __LINE__ + 1
49
+ def #{column_type}(*args)
50
+ column(args[0], '#{column_type}', args[1].nil? ? {} : args[1])
51
+ end
52
+ EOV
53
+ end
54
+
55
+ protected
56
+ def sed_to_s
57
+ seds = []
58
+ seds << "| sed #{nulls_to_sed.join(" ")}" unless @null_replacements.nil?
59
+ seds << "| sed #{@extra_seds.join("| sed ")}" unless @extra_seds.nil?
60
+ return seds.join(" ") unless seds.empty?
61
+ end
62
+
63
+ def nulls_to_sed
64
+ @null_replacements.map do |null|
65
+ "-e 's/#{null.source}//g'"
66
+ end
67
+ end
68
+
69
+ def parent
70
+ @parent ||= ::ActiveRecord::Base
71
+ end
72
+
73
+ def create_table
74
+ f = File.open(stream_location, 'r')
75
+ instance.connection.create_table(instance.table_name, :temporary => true, :id => false) do |t|
76
+ f.each_line do |line|
77
+ line.split(/,/).each do |col|
78
+ column_name = col.is_a?(Hash) ? col : symbolize(col)
79
+ if custom = @column_names.fetch(column_name, nil)
80
+ t.column(*custom)
81
+ else
82
+ t.string column_name
83
+ end
84
+ end
85
+ break
86
+ end
87
+ end
88
+ end
89
+
90
+ def column(name, type, options)
91
+ @column_names.merge! name.to_sym => [name, type, options]
92
+ end
93
+
94
+ def pipe_it(l = "")
95
+ raw = instance.connection.raw_connection
96
+ raw.query "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
97
+ command = "cat #{stream_location} #{sed_to_s}"
98
+ f = IO.popen(command)
99
+ begin
100
+ while f.read(8192, l)
101
+ raw.put_copy_data l
102
+ end
103
+ rescue EOFError
104
+ f.close
105
+ end
106
+ raw.put_copy_end
107
+ end
108
+ end
109
+ end
data/lib/theman.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'theman/themans_agency'
2
+
3
+ module Theman
4
+ end
@@ -0,0 +1,5 @@
1
+ "COL DATE","COL TWO","COL THREE","COL FOUR","COL FIVE"
2
+ "05/12/2010","some \"text\"","123134343","Y","2.0"
3
+ "05/11/2010","some other \"text\"","987983134343","Y","11.5"
4
+ "05/11/2010","","N","","10.5"
5
+ "05/11/2010","","UNKNOWN","","3.0"
@@ -0,0 +1,21 @@
1
+ "DATE","ORG_CODE","JOB_SEEKER_ID"
2
+ "23/09/2010","XXXX","342310094609"
3
+ "23/09/2010","XXXX","234583366709"
4
+ "23/09/2010","XXXX","342939870209"
5
+ "23/09/2010","XXXX","234098040009"
6
+ "23/09/2010","XXXX","987919906509"
7
+
8
+ "@!!@"
9
+ "Subscription ID: XXXXXXXXXXXXXXXXXXXXXX"
10
+ "Managed By: XXXXX"
11
+ "Record Count: XXXXX"
12
+ "Report Date: XXXXXXX"
13
+ "Extract Date: XXXXXXX"
14
+ "Data Load Freq: DAILY"
15
+ "Report Name: XXXXX"
16
+ "Report Desc: XXX XXXX XXXXX XXX XXX XX XX"
17
+ "Report Message: "
18
+ "Solution Name: XXXXXX"
19
+ "Solution Desc: "
20
+ "REPORT CRITERIA BELOW"
21
+ "Organisation: XXXX"
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'rails'
4
+
5
+ require 'active_record'
6
+ Rails.env = 'test'
7
+ ActiveRecord::Base.configurations = YAML.load_file(File.join("spec", "database.yml"))
8
+ FileUtils.mkdir_p "#{Dir.pwd}/log"
9
+ logfile= File.new("#{Dir.pwd}/log/database.log", "w")
10
+ ActiveRecord::Base.logger = Logger.new(File.open(logfile, 'w'))
11
+ ActiveRecord::Base.establish_connection(ActiveRecord::Base.configurations.fetch(Rails.env))
12
+
13
+ require 'theman'
14
+ require 'rspec'
15
+ require 'rspec/autorun'
@@ -0,0 +1,91 @@
1
+ require 'spec_helper'
2
+
3
+ describe Theman::Agency, "instance object" do
4
+ before do
5
+ @instance = ::Theman::Agency.new.instance
6
+ end
7
+
8
+ it "should superclass active record" do
9
+ @instance.superclass.should == ActiveRecord::Base
10
+ end
11
+
12
+ it "should have connection" do
13
+ @instance.connection.class.should == ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
14
+ end
15
+
16
+ it "should have a table name" do
17
+ @instance.table_name.should match /c[0-9]{10}/
18
+ end
19
+
20
+ it "should have an ispect method" do
21
+ @instance.inspect.should match /Agent/
22
+ end
23
+ end
24
+
25
+ describe Theman::Agency, "instance methods" do
26
+ it "should downcase and symbolize" do
27
+ Theman::Agency.new.symbolize("STRANGE NAME").should == :strange_name
28
+ end
29
+ end
30
+
31
+ describe Theman::Agency, "basic" do
32
+ before do
33
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
34
+ @cabinet = ::Theman::Agency.new @csv
35
+ @instance = @cabinet.instance
36
+ end
37
+
38
+ it "should have all the records from the csv" do
39
+ @instance.count.should == 4
40
+ end
41
+ end
42
+
43
+ describe Theman::Agency, "sed chomp" do
44
+ before do
45
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
46
+ @cabinet = ::Theman::Agency.new @csv do |cabinet|
47
+ cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
48
+ end
49
+ @instance = @cabinet.instance
50
+ end
51
+
52
+ it "should have all the records from the csv" do
53
+ @instance.count.should == 5
54
+ end
55
+ end
56
+
57
+ describe Theman::Agency, "data types" do
58
+ before do
59
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
60
+ @cabinet = ::Theman::Agency.new @csv do |cabinet|
61
+ cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
62
+ cabinet.table do |t|
63
+ t.date :col_date
64
+ t.boolean :col_four
65
+ t.float :col_five
66
+ end
67
+ end
68
+ @instance = @cabinet.instance
69
+ end
70
+
71
+ it "should create date col" do
72
+ @instance.first.col_date.class.should == Date
73
+ end
74
+
75
+ it "should create boolean col" do
76
+ @instance.where(:col_four => true).count.should == 2
77
+ end
78
+
79
+ it "should create float col" do
80
+ @instance.where("col_five > 10.0").count.should == 2
81
+ end
82
+
83
+ it "should have an array of nulls" do
84
+ @cabinet.null_replacements.should == [/"N"/, /"UNKNOWN"/, /""/]
85
+ end
86
+
87
+ it "should have nulls not strings" do
88
+ @instance.where(:col_two => nil).count.should == 2
89
+ @instance.where(:col_three => nil).count.should == 2
90
+ end
91
+ end
data/theman.gemspec ADDED
@@ -0,0 +1,55 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{theman}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Rufus Post"]
12
+ s.date = %q{2010-09-28}
13
+ s.description = %q{longer description of your gem}
14
+ s.email = %q{rufuspost@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "README.rdoc"
17
+ ]
18
+ s.files = [
19
+ ".document",
20
+ ".gitignore",
21
+ "README.rdoc",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "lib/theman.rb",
25
+ "lib/theman/themans_agency.rb",
26
+ "spec/fixtures/temp_one.csv",
27
+ "spec/fixtures/temp_two.csv",
28
+ "spec/spec_helper.rb",
29
+ "spec/theman_spec.rb",
30
+ "theman.gemspec"
31
+ ]
32
+ s.homepage = %q{http://github.com/mynameisrufus/theman}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.7}
36
+ s.summary = %q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
37
+ s.test_files = [
38
+ "spec/theman_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
48
+ else
49
+ s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
50
+ end
51
+ else
52
+ s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
53
+ end
54
+ end
55
+
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: theman
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Rufus Post
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-09-28 00:00:00 +10:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 2
30
+ - 0
31
+ - 0
32
+ - beta
33
+ - 20
34
+ version: 2.0.0.beta.20
35
+ type: :development
36
+ version_requirements: *id001
37
+ description: longer description of your gem
38
+ email: rufuspost@gmail.com
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files:
44
+ - README.rdoc
45
+ files:
46
+ - .document
47
+ - .gitignore
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - lib/theman.rb
52
+ - lib/theman/themans_agency.rb
53
+ - spec/fixtures/temp_one.csv
54
+ - spec/fixtures/temp_two.csv
55
+ - spec/spec_helper.rb
56
+ - spec/theman_spec.rb
57
+ - theman.gemspec
58
+ has_rdoc: true
59
+ homepage: http://github.com/mynameisrufus/theman
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --charset=UTF-8
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ requirements: []
84
+
85
+ rubyforge_project:
86
+ rubygems_version: 1.3.7
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: PostgreSQL AR temporary table generator using PostgreSQL COPY
90
+ test_files:
91
+ - spec/theman_spec.rb
92
+ - spec/spec_helper.rb