theman 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,25 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
22
+ spec/database.yml
23
+ log/
24
+ spec/fixtures/fucking_huge.csv
25
+ spec/fucking_huge_spec.rb
data/README.rdoc ADDED
@@ -0,0 +1,69 @@
1
+ = theman
2
+
3
+ The man getting you down?
4
+
5
+ FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman.
6
+
7
+ == Installation
8
+
9
+ config.gem 'theman'
10
+
11
+ Or
12
+
13
+ gem install 'theman'
14
+
15
+ == Basic Usage
16
+
17
+ cabinet = ::Theman::Agency.new 'pretty.csv'
18
+ temp_model = cabinet.instance
19
+ temp_model.count
20
+
21
+ == Advanced Usage
22
+
23
+ cabinet = ::Theman::Agency.new 'ugly.csv' do |cabinet|
24
+ cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
25
+ cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
26
+ cabinet.table do |t|
27
+ t.date :date
28
+ t.integer :ext_id
29
+ t.float :amount
30
+ t.boolean :exited
31
+ end
32
+ end
33
+ temp_model = cabinet.instance
34
+ temp_model.where(:exited => true).count
35
+
36
+ In the above example we ommitted the last 15 rows and made some things null.
37
+
38
+ If you do not provide a table block your columns will be VARCHAR(255), you can cherry pick cols to change data types.
39
+
40
+ The temp table has no id col but you could add one after if you wanted
41
+
42
+ == Troubles
43
+
44
+ Table empty? the man has given you crappy data and PostgresSQL has silently dissed your data
45
+
46
+ == Copyright
47
+
48
+ (The MIT License)
49
+
50
+ Copyright (c) 2010 {mynameisrufus (Rufus Post)}[http://github.com/mynameisrufus]
51
+
52
+ Permission is hereby granted, free of charge, to any person obtaining
53
+ a copy of this software and associated documentation files (the
54
+ "Software"), to deal in the Software without restriction, including
55
+ without limitation the rights to use, copy, modify, merge, publish,
56
+ distribute, sublicense, and/or sell copies of the Software, and to
57
+ permit persons to whom the Software is furnished to do so, subject to
58
+ the following conditions:
59
+
60
+ The above copyright notice and this permission notice shall be
61
+ included in all copies or substantial portions of the Software.
62
+
63
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
64
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
66
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
67
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
68
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
69
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "theman"
8
+ gem.summary = %Q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
9
+ gem.description = %Q{longer description of your gem}
10
+ gem.email = "rufuspost@gmail.com"
11
+ gem.homepage = "http://github.com/mynameisrufus/theman"
12
+ gem.authors = ["Rufus Post"]
13
+ gem.add_development_dependency "rspec", ">= 2.0.0.beta.20"
14
+ end
15
+ Jeweler::GemcutterTasks.new
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
18
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,109 @@
1
+ module Theman
2
+ class Agency
3
+ attr_reader :instance, :table_proc, :null_replacements, :stream_location, :column_names
4
+
5
+ def initialize(*args)
6
+ cabinet_id = "c#{10.times.map{rand(9)}.join}"
7
+ @column_names = {}
8
+ @instance = Class.new(parent) do
9
+ instance_eval <<-EOV, __FILE__, __LINE__ + 1
10
+ set_table_name "#{cabinet_id}"
11
+ def table_name
12
+ "#{cabinet_id}"
13
+ end
14
+ def inspect
15
+ "Agent (#{cabinet_id})"
16
+ end
17
+ EOV
18
+ end
19
+ yield self if block_given?
20
+ unless args.first.nil?
21
+ @stream_location = args.first
22
+ create_table
23
+ pipe_it
24
+ end
25
+ end
26
+
27
+ def instance_parent(klass)
28
+ @parent = klass
29
+ end
30
+
31
+ def table(&block)
32
+ yield self if block_given?
33
+ end
34
+
35
+ def nulls(*args)
36
+ @null_replacements = args
37
+ end
38
+
39
+ def seds(*args)
40
+ @extra_seds = args
41
+ end
42
+
43
+ def symbolize(name)
44
+ name.gsub(/ /,"_").gsub(/\W/, "").downcase.to_sym
45
+ end
46
+
47
+ %w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
48
+ class_eval <<-EOV, __FILE__, __LINE__ + 1
49
+ def #{column_type}(*args)
50
+ column(args[0], '#{column_type}', args[1].nil? ? {} : args[1])
51
+ end
52
+ EOV
53
+ end
54
+
55
+ protected
56
+ def sed_to_s
57
+ seds = []
58
+ seds << "| sed #{nulls_to_sed.join(" ")}" unless @null_replacements.nil?
59
+ seds << "| sed #{@extra_seds.join("| sed ")}" unless @extra_seds.nil?
60
+ return seds.join(" ") unless seds.empty?
61
+ end
62
+
63
+ def nulls_to_sed
64
+ @null_replacements.map do |null|
65
+ "-e 's/#{null.source}//g'"
66
+ end
67
+ end
68
+
69
+ def parent
70
+ @parent ||= ::ActiveRecord::Base
71
+ end
72
+
73
+ def create_table
74
+ f = File.open(stream_location, 'r')
75
+ instance.connection.create_table(instance.table_name, :temporary => true, :id => false) do |t|
76
+ f.each_line do |line|
77
+ line.split(/,/).each do |col|
78
+ column_name = col.is_a?(Hash) ? col : symbolize(col)
79
+ if custom = @column_names.fetch(column_name, nil)
80
+ t.column(*custom)
81
+ else
82
+ t.string column_name
83
+ end
84
+ end
85
+ break
86
+ end
87
+ end
88
+ end
89
+
90
+ def column(name, type, options)
91
+ @column_names.merge! name.to_sym => [name, type, options]
92
+ end
93
+
94
+ def pipe_it(l = "")
95
+ raw = instance.connection.raw_connection
96
+ raw.query "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
97
+ command = "cat #{stream_location} #{sed_to_s}"
98
+ f = IO.popen(command)
99
+ begin
100
+ while f.read(8192, l)
101
+ raw.put_copy_data l
102
+ end
103
+ rescue EOFError
104
+ f.close
105
+ end
106
+ raw.put_copy_end
107
+ end
108
+ end
109
+ end
data/lib/theman.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'theman/themans_agency'
2
+
3
+ module Theman
4
+ end
@@ -0,0 +1,5 @@
1
+ "COL DATE","COL TWO","COL THREE","COL FOUR","COL FIVE"
2
+ "05/12/2010","some \"text\"","123134343","Y","2.0"
3
+ "05/11/2010","some other \"text\"","987983134343","Y","11.5"
4
+ "05/11/2010","","N","","10.5"
5
+ "05/11/2010","","UNKNOWN","","3.0"
@@ -0,0 +1,21 @@
1
+ "DATE","ORG_CODE","JOB_SEEKER_ID"
2
+ "23/09/2010","XXXX","342310094609"
3
+ "23/09/2010","XXXX","234583366709"
4
+ "23/09/2010","XXXX","342939870209"
5
+ "23/09/2010","XXXX","234098040009"
6
+ "23/09/2010","XXXX","987919906509"
7
+
8
+ "@!!@"
9
+ "Subscription ID: XXXXXXXXXXXXXXXXXXXXXX"
10
+ "Managed By: XXXXX"
11
+ "Record Count: XXXXX"
12
+ "Report Date: XXXXXXX"
13
+ "Extract Date: XXXXXXX"
14
+ "Data Load Freq: DAILY"
15
+ "Report Name: XXXXX"
16
+ "Report Desc: XXX XXXX XXXXX XXX XXX XX XX"
17
+ "Report Message: "
18
+ "Solution Name: XXXXXX"
19
+ "Solution Desc: "
20
+ "REPORT CRITERIA BELOW"
21
+ "Organisation: XXXX"
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'rails'
4
+
5
+ require 'active_record'
6
+ Rails.env = 'test'
7
+ ActiveRecord::Base.configurations = YAML.load_file(File.join("spec", "database.yml"))
8
+ FileUtils.mkdir_p "#{Dir.pwd}/log"
9
+ logfile= File.new("#{Dir.pwd}/log/database.log", "w")
10
+ ActiveRecord::Base.logger = Logger.new(File.open(logfile, 'w'))
11
+ ActiveRecord::Base.establish_connection(ActiveRecord::Base.configurations.fetch(Rails.env))
12
+
13
+ require 'theman'
14
+ require 'rspec'
15
+ require 'rspec/autorun'
@@ -0,0 +1,91 @@
1
+ require 'spec_helper'
2
+
3
+ describe Theman::Agency, "instance object" do
4
+ before do
5
+ @instance = ::Theman::Agency.new.instance
6
+ end
7
+
8
+ it "should superclass active record" do
9
+ @instance.superclass.should == ActiveRecord::Base
10
+ end
11
+
12
+ it "should have connection" do
13
+ @instance.connection.class.should == ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
14
+ end
15
+
16
+ it "should have a table name" do
17
+ @instance.table_name.should match /c[0-9]{10}/
18
+ end
19
+
20
+ it "should have an ispect method" do
21
+ @instance.inspect.should match /Agent/
22
+ end
23
+ end
24
+
25
+ describe Theman::Agency, "instance methods" do
26
+ it "should downcase and symbolize" do
27
+ Theman::Agency.new.symbolize("STRANGE NAME").should == :strange_name
28
+ end
29
+ end
30
+
31
+ describe Theman::Agency, "basic" do
32
+ before do
33
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
34
+ @cabinet = ::Theman::Agency.new @csv
35
+ @instance = @cabinet.instance
36
+ end
37
+
38
+ it "should have all the records from the csv" do
39
+ @instance.count.should == 4
40
+ end
41
+ end
42
+
43
+ describe Theman::Agency, "sed chomp" do
44
+ before do
45
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
46
+ @cabinet = ::Theman::Agency.new @csv do |cabinet|
47
+ cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
48
+ end
49
+ @instance = @cabinet.instance
50
+ end
51
+
52
+ it "should have all the records from the csv" do
53
+ @instance.count.should == 5
54
+ end
55
+ end
56
+
57
+ describe Theman::Agency, "data types" do
58
+ before do
59
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
60
+ @cabinet = ::Theman::Agency.new @csv do |cabinet|
61
+ cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
62
+ cabinet.table do |t|
63
+ t.date :col_date
64
+ t.boolean :col_four
65
+ t.float :col_five
66
+ end
67
+ end
68
+ @instance = @cabinet.instance
69
+ end
70
+
71
+ it "should create date col" do
72
+ @instance.first.col_date.class.should == Date
73
+ end
74
+
75
+ it "should create boolean col" do
76
+ @instance.where(:col_four => true).count.should == 2
77
+ end
78
+
79
+ it "should create float col" do
80
+ @instance.where("col_five > 10.0").count.should == 2
81
+ end
82
+
83
+ it "should have an array of nulls" do
84
+ @cabinet.null_replacements.should == [/"N"/, /"UNKNOWN"/, /""/]
85
+ end
86
+
87
+ it "should have nulls not strings" do
88
+ @instance.where(:col_two => nil).count.should == 2
89
+ @instance.where(:col_three => nil).count.should == 2
90
+ end
91
+ end
data/theman.gemspec ADDED
@@ -0,0 +1,55 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{theman}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Rufus Post"]
12
+ s.date = %q{2010-09-28}
13
+ s.description = %q{longer description of your gem}
14
+ s.email = %q{rufuspost@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "README.rdoc"
17
+ ]
18
+ s.files = [
19
+ ".document",
20
+ ".gitignore",
21
+ "README.rdoc",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "lib/theman.rb",
25
+ "lib/theman/themans_agency.rb",
26
+ "spec/fixtures/temp_one.csv",
27
+ "spec/fixtures/temp_two.csv",
28
+ "spec/spec_helper.rb",
29
+ "spec/theman_spec.rb",
30
+ "theman.gemspec"
31
+ ]
32
+ s.homepage = %q{http://github.com/mynameisrufus/theman}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.7}
36
+ s.summary = %q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
37
+ s.test_files = [
38
+ "spec/theman_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
48
+ else
49
+ s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
50
+ end
51
+ else
52
+ s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
53
+ end
54
+ end
55
+
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: theman
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Rufus Post
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-09-28 00:00:00 +10:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 2
30
+ - 0
31
+ - 0
32
+ - beta
33
+ - 20
34
+ version: 2.0.0.beta.20
35
+ type: :development
36
+ version_requirements: *id001
37
+ description: longer description of your gem
38
+ email: rufuspost@gmail.com
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files:
44
+ - README.rdoc
45
+ files:
46
+ - .document
47
+ - .gitignore
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - lib/theman.rb
52
+ - lib/theman/themans_agency.rb
53
+ - spec/fixtures/temp_one.csv
54
+ - spec/fixtures/temp_two.csv
55
+ - spec/spec_helper.rb
56
+ - spec/theman_spec.rb
57
+ - theman.gemspec
58
+ has_rdoc: true
59
+ homepage: http://github.com/mynameisrufus/theman
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --charset=UTF-8
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ requirements: []
84
+
85
+ rubyforge_project:
86
+ rubygems_version: 1.3.7
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: PostgreSQL AR temporary table generator using PostgreSQL COPY
90
+ test_files:
91
+ - spec/theman_spec.rb
92
+ - spec/spec_helper.rb