pg_csv 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +7 -1
- data/benchmark/bench.rb +48 -0
- data/lib/pg_csv.rb +19 -34
- data/lib/pg_csv_version.rb +1 -1
- data/spec/pg_csv_spec.rb +11 -3
- metadata +85 -75
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -31,6 +31,7 @@ Options:
|
|
31
31
|
=> :gzip - save file to gzip
|
32
32
|
=> :stream - save to stream
|
33
33
|
=> :file - just save to file = default
|
34
|
+
=> :yield - return each row to block
|
34
35
|
```
|
35
36
|
|
36
37
|
Examples:
|
@@ -38,7 +39,7 @@ Examples:
|
|
38
39
|
PgCsv.new(:sql => User.good.to_sql).export('a1.csv')
|
39
40
|
PgCsv.new(:sql => sql).export('a2.gz', :type => :gzip)
|
40
41
|
PgCsv.new(:sql => sql).export('a3.csv', :temp_file => true)
|
41
|
-
PgCsv.new(:sql => sql
|
42
|
+
PgCsv.new(:sql => sql, :type => :plain).export
|
42
43
|
File.open("a4.csv", 'a'){|f| FastPgCsv.new(:sql => "select * from users").\
|
43
44
|
export(f, :type => :stream) }
|
44
45
|
PgCsv.new(:sql => sql).export('a5.csv', :delimiter => "\t")
|
@@ -54,4 +55,9 @@ Zlib::GzipWriter.open('some.gz') do |stream|
|
|
54
55
|
e.export(stream, :connection => connection)
|
55
56
|
end
|
56
57
|
end
|
58
|
+
|
59
|
+
# yield example
|
60
|
+
PgCsv.new(:sql => sql, :type => :yield).export do |row|
|
61
|
+
puts row
|
62
|
+
end
|
57
63
|
```
|
data/benchmark/bench.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require "bundler"
|
3
|
+
Bundler.setup
|
4
|
+
|
5
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
6
|
+
require 'pg_csv'
|
7
|
+
|
8
|
+
require 'benchmark'
|
9
|
+
require 'fileutils'
|
10
|
+
|
11
|
+
class PgCsv
|
12
|
+
def sql
|
13
|
+
""
|
14
|
+
end
|
15
|
+
|
16
|
+
def connection
|
17
|
+
1
|
18
|
+
end
|
19
|
+
|
20
|
+
def load_data
|
21
|
+
n = o(:times).to_i
|
22
|
+
c = 0
|
23
|
+
n.times do
|
24
|
+
c += 1
|
25
|
+
yield(@block["#{c},#{c*2},#{c * 249},#{rand(100)},#{rand(n)},blablabla,hahah,ahah,ahaha,ahahah,ah,1.55234143\n"])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
filename = "./blah.test.file"
|
31
|
+
N = 500_000
|
32
|
+
|
33
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :file).export(filename) }
|
34
|
+
puts "export file #{tm}"
|
35
|
+
|
36
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :gzip).export(filename) }
|
37
|
+
puts "export gzip #{tm}"
|
38
|
+
|
39
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :plain).export }
|
40
|
+
puts "export plain #{tm}"
|
41
|
+
|
42
|
+
tm = Benchmark.realtime{ File.open(filename, 'w'){|f| PgCsv.new(:times => N, :type => :stream).export(f) } }
|
43
|
+
puts "export stream #{tm}"
|
44
|
+
|
45
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :yield).export{|row| row } }
|
46
|
+
puts "export yield #{tm}"
|
47
|
+
|
48
|
+
FileUtils.rm(filename) rescue nil
|
data/lib/pg_csv.rb
CHANGED
@@ -2,28 +2,13 @@ require 'active_record'
|
|
2
2
|
|
3
3
|
class PgCsv
|
4
4
|
|
5
|
-
# opts:
|
6
|
-
# :sql => "select u.*, p.* from users u, projects p where p.user_id = u.id order by email limit 100"
|
7
|
-
# :connection => AR.connection
|
8
|
-
# :delimiter => ["\t", ",", ]
|
9
|
-
# :header => boolean, use pg header for fields?
|
10
|
-
# :logger => logger
|
11
|
-
# :columns => manual array of column names, ignore :header option
|
12
|
-
|
13
|
-
# :temp_file => boolean, generating throught temp file, final file appears by mv
|
14
|
-
# :temp_dir => path, ex: /tmp
|
15
|
-
|
16
|
-
# :type => :plain - return full string
|
17
|
-
# => :gzip - save file to gzip
|
18
|
-
# => :stream - save to stream
|
19
|
-
# => :file - just save to file * default
|
20
|
-
|
21
5
|
def initialize(opts = {})
|
22
6
|
@options = opts.symbolize_keys
|
23
7
|
end
|
24
8
|
|
25
9
|
# do export :to - filename or stream
|
26
|
-
def export(to, opts = {})
|
10
|
+
def export(to = nil, opts = {}, &block)
|
11
|
+
@block = block || Proc.new{|x|x}
|
27
12
|
@local_options = opts.symbolize_keys
|
28
13
|
|
29
14
|
raise ":connection should be" unless connection
|
@@ -37,7 +22,7 @@ class PgCsv
|
|
37
22
|
protected
|
38
23
|
|
39
24
|
def with_temp_file(to, use_temp_file, tmp_dir)
|
40
|
-
if use_temp_file
|
25
|
+
if use_temp_file && [:file, :gzip].include?(type)
|
41
26
|
check_str(to)
|
42
27
|
|
43
28
|
require 'fileutils'
|
@@ -70,6 +55,7 @@ protected
|
|
70
55
|
Zlib::GzipWriter.open(to, &exporter)
|
71
56
|
|
72
57
|
when :stream
|
58
|
+
raise "'to' should be" unless to
|
73
59
|
exporter[to]
|
74
60
|
|
75
61
|
when :plain
|
@@ -78,6 +64,10 @@ protected
|
|
78
64
|
exporter[sio]
|
79
65
|
result = sio.string
|
80
66
|
|
67
|
+
when :yield
|
68
|
+
# not real saving anywhere, just yield each record
|
69
|
+
raise "block should be" unless @block
|
70
|
+
result = load_data{|_|}
|
81
71
|
end
|
82
72
|
|
83
73
|
info "<=== finished write #{to} in #{Time.now - start}"
|
@@ -94,22 +84,18 @@ protected
|
|
94
84
|
end
|
95
85
|
|
96
86
|
def export_to_stream(stream)
|
97
|
-
write_csv(stream)
|
87
|
+
count = write_csv(stream)
|
98
88
|
stream.flush if stream.respond_to?(:flush)
|
89
|
+
|
90
|
+
info "<= done exporting (#{count}) records."
|
99
91
|
end
|
100
92
|
|
101
93
|
def write_csv(stream)
|
102
|
-
count = 0
|
103
|
-
|
104
94
|
load_data do |row|
|
105
|
-
|
106
|
-
stream.write prepare_row(row)
|
95
|
+
stream.write(row)
|
107
96
|
end
|
108
|
-
|
109
|
-
info "<= done exporting (#{count}) records."
|
110
|
-
count
|
111
97
|
end
|
112
|
-
|
98
|
+
|
113
99
|
def load_data
|
114
100
|
info "#{query}"
|
115
101
|
raw = connection.raw_connection
|
@@ -119,14 +105,17 @@ protected
|
|
119
105
|
info "<= query"
|
120
106
|
|
121
107
|
info "=> write data"
|
122
|
-
yield(columns_str) if columns_str
|
123
|
-
|
108
|
+
yield(@block[columns_str]) if columns_str
|
109
|
+
|
110
|
+
count = 0
|
124
111
|
while row = raw.get_copy_data()
|
125
|
-
yield
|
112
|
+
yield(@block[row])
|
113
|
+
count += 1
|
126
114
|
end
|
127
115
|
info "<= write data"
|
128
116
|
|
129
117
|
q.clear
|
118
|
+
count
|
130
119
|
end
|
131
120
|
|
132
121
|
def query
|
@@ -140,10 +129,6 @@ DELIMITER '#{delimiter}'
|
|
140
129
|
SQL
|
141
130
|
end
|
142
131
|
|
143
|
-
def prepare_row(row)
|
144
|
-
row
|
145
|
-
end
|
146
|
-
|
147
132
|
def info(message)
|
148
133
|
logger.info(message) if logger
|
149
134
|
end
|
data/lib/pg_csv_version.rb
CHANGED
data/spec/pg_csv_spec.rb
CHANGED
@@ -114,6 +114,15 @@ describe PgCsv do
|
|
114
114
|
PgCsv.new(:sql => @sql, :type => :file).export(@name)
|
115
115
|
with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
|
116
116
|
end
|
117
|
+
|
118
|
+
it "yield export" do
|
119
|
+
rows = []
|
120
|
+
PgCsv.new(:sql => @sql, :type => :yield).export do |row|
|
121
|
+
rows << row
|
122
|
+
end.should == 2
|
123
|
+
|
124
|
+
rows.should == ["4,5,6\n", "1,2,3\n"]
|
125
|
+
end
|
117
126
|
end
|
118
127
|
|
119
128
|
describe "integration specs" do
|
@@ -138,11 +147,10 @@ describe PgCsv do
|
|
138
147
|
it "custom prepare row" do
|
139
148
|
e = PgCsv.new(:sql => @sql)
|
140
149
|
|
141
|
-
|
150
|
+
e.export(@name) do |row|
|
142
151
|
row.split(",").join("-|-")
|
143
152
|
end
|
144
|
-
|
145
|
-
e.export(@name)
|
153
|
+
|
146
154
|
with_file(@name){|d| d.should == "4-|-5-|-6\n1-|-2-|-3\n" }
|
147
155
|
end
|
148
156
|
|
metadata
CHANGED
@@ -1,94 +1,95 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_csv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 3
|
10
|
+
version: 0.1.3
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Makarchev Konstantin
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
|
18
|
+
date: 2012-06-13 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: pg
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '0'
|
22
|
-
type: :runtime
|
23
22
|
prerelease: false
|
24
|
-
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ! '>='
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: activerecord
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
33
24
|
none: false
|
34
|
-
requirements:
|
35
|
-
- -
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
38
32
|
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: activerecord
|
39
36
|
prerelease: false
|
40
|
-
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
38
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 3
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :runtime
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
47
49
|
name: rspec
|
48
|
-
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
|
-
requirements:
|
51
|
-
- - ! '>='
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: '0'
|
54
|
-
type: :development
|
55
50
|
prerelease: false
|
56
|
-
|
57
|
-
none: false
|
58
|
-
requirements:
|
59
|
-
- - ! '>='
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: rake
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
65
52
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
70
60
|
type: :development
|
61
|
+
version_requirements: *id003
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rake
|
71
64
|
prerelease: false
|
72
|
-
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
73
66
|
none: false
|
74
|
-
requirements:
|
75
|
-
- -
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :development
|
75
|
+
version_requirements: *id004
|
76
|
+
description: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
|
77
|
+
email:
|
81
78
|
- kostya27@gmail.com
|
82
79
|
executables: []
|
80
|
+
|
83
81
|
extensions: []
|
82
|
+
|
84
83
|
extra_rdoc_files: []
|
85
|
-
|
84
|
+
|
85
|
+
files:
|
86
86
|
- .gitignore
|
87
87
|
- Gemfile
|
88
88
|
- Gemfile.lock
|
89
89
|
- MIT-LICENSE
|
90
90
|
- README.md
|
91
91
|
- Rakefile
|
92
|
+
- benchmark/bench.rb
|
92
93
|
- lib/pg_csv.rb
|
93
94
|
- lib/pg_csv_version.rb
|
94
95
|
- pg_csv.gemspec
|
@@ -98,27 +99,36 @@ files:
|
|
98
99
|
- spec/tmp/.gitkeep
|
99
100
|
homepage: http://github.com/kostya/pg_csv
|
100
101
|
licenses: []
|
102
|
+
|
101
103
|
post_install_message:
|
102
104
|
rdoc_options: []
|
103
|
-
|
105
|
+
|
106
|
+
require_paths:
|
104
107
|
- lib
|
105
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
109
|
none: false
|
107
|
-
requirements:
|
108
|
-
- -
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
|
111
|
-
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
hash: 3
|
114
|
+
segments:
|
115
|
+
- 0
|
116
|
+
version: "0"
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
118
|
none: false
|
113
|
-
requirements:
|
114
|
-
- -
|
115
|
-
- !ruby/object:Gem::Version
|
116
|
-
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
hash: 3
|
123
|
+
segments:
|
124
|
+
- 0
|
125
|
+
version: "0"
|
117
126
|
requirements: []
|
127
|
+
|
118
128
|
rubyforge_project:
|
119
129
|
rubygems_version: 1.8.24
|
120
130
|
signing_key:
|
121
131
|
specification_version: 3
|
122
|
-
summary: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective
|
123
|
-
on millions rows.
|
132
|
+
summary: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
|
124
133
|
test_files: []
|
134
|
+
|