tzispa_data 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/tzispa/data/transporter.rb +130 -0
- data/lib/tzispa/data/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9234402ed79a266e628d427704877a3e1a7286a
|
4
|
+
data.tar.gz: 52cde789079f0e62cddb344ebb1b7c612316156f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72bc1e0dd2b08fa542e3e758b3c6465da6b8c9b6017e82e114c9fb684b221e0876a595e4085541fdfc415c28952b70ab4b86a0b6ef91134f7881f6655c6271a6
|
7
|
+
data.tar.gz: 355d77a38353fe5af31d77927d3dff04f22e89337e6a3b105bc4dc94279b383dc1c134e1ef8a844dd97985bd5aecda3efc12e464af4ece17b2755a18c1589275
|
data/CHANGELOG.md
CHANGED
@@ -0,0 +1,130 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Tzispa
|
4
|
+
module Data
|
5
|
+
|
6
|
+
class Transporter
|
7
|
+
|
8
|
+
DEFAULT_ENCODING = 'ASCII-8BIT'
|
9
|
+
DEFAULT_BUFFER_SIZE = 2048
|
10
|
+
DEFAULT_LINE_SEPARATOR = "\n"
|
11
|
+
|
12
|
+
attr_reader :filename, :buffer_size, :encoding, :line_separator, :data_separator, :line_size, :strip, :check_count, :lines, :errors
|
13
|
+
|
14
|
+
def initialize(fn, options = {})
|
15
|
+
@filename = fn
|
16
|
+
@buffer_size = options[:buffer_size] || DEFAULT_BUFFER_SIZE
|
17
|
+
@encoding = options[:encoding] || DEFAULT_ENCODING
|
18
|
+
@line_separator = options[:line_separator] || DEFAULT_LINE_SEPARATOR
|
19
|
+
@line_size = options[:line_size]
|
20
|
+
@data_separator = options[:data_separator]
|
21
|
+
@strip = options[:strip]
|
22
|
+
@check_count = options[:check_count]
|
23
|
+
@errors = Array.new
|
24
|
+
end
|
25
|
+
|
26
|
+
def exist?
|
27
|
+
File.exist? filename
|
28
|
+
end
|
29
|
+
|
30
|
+
def import(dataset, columns)
|
31
|
+
lines = 0
|
32
|
+
errors.clear
|
33
|
+
buffer = Array.new
|
34
|
+
File.open(filename, "rb:#{encoding}") { |fh|
|
35
|
+
while line = read_line(fh, lines)
|
36
|
+
lines += 1
|
37
|
+
values = block_given? ? yield(line) : line.split(data_separator)
|
38
|
+
raise TransporterBadFormat.new("Bad file format at line #{lines}: columns number does not match with values") unless values.count == columns.count
|
39
|
+
buffer << values
|
40
|
+
flush_data(dataset, columns, buffer) if lines % buffer_size == 0
|
41
|
+
end
|
42
|
+
flush_data dataset, columns, buffer
|
43
|
+
}
|
44
|
+
ds_count = dataset.count
|
45
|
+
raise TransporterRecordCount.new ("Lines count (#{lines}) and records count (#{ds_count}) does not match") if check_count && lines != ds_count
|
46
|
+
[lines, ds_count]
|
47
|
+
end
|
48
|
+
|
49
|
+
def export(data, append = false, &block)
|
50
|
+
count = 0
|
51
|
+
File.open(filename, append ? "ab:#{encoding}" : "wb:#{encoding}") { |fh|
|
52
|
+
lock(fh, File::LOCK_EX) { |lfh|
|
53
|
+
if data.is_a? Hash
|
54
|
+
lfh << build_line(data, &block)
|
55
|
+
count += 1
|
56
|
+
else
|
57
|
+
data.each { |row|
|
58
|
+
lfh << build_line(row, &block)
|
59
|
+
count += 1
|
60
|
+
}
|
61
|
+
end
|
62
|
+
}
|
63
|
+
}
|
64
|
+
count
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
def build_line(data, &block)
|
70
|
+
String.new(block_given? ? yield(data) : data.values.join(data_separator)).tap { |line|
|
71
|
+
line << line_separator
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def read_line(fh, lines)
|
76
|
+
if line_size
|
77
|
+
fh.read(line_size).tap { |record|
|
78
|
+
res = fh.gets(line_separator) if strip && !fh.eof?
|
79
|
+
raise TransporterBadFormat.new("Bad file format at line #{lines+1}") unless res.nil? || res.strip.empty?
|
80
|
+
} unless fh.eof?
|
81
|
+
else
|
82
|
+
fh.gets(line_separator).tap { |line|
|
83
|
+
line.rstrip! if strip
|
84
|
+
} unless fh.eof?
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def flush_data(dataset, columns, buffer)
|
89
|
+
begin
|
90
|
+
dataset.import(columns, buffer)
|
91
|
+
buffer.clear
|
92
|
+
rescue
|
93
|
+
insert_by_row(dataset, columns, buffer)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def insert_by_row(dataset, columns, buffer)
|
98
|
+
begin
|
99
|
+
buffer.each { |row|
|
100
|
+
begin
|
101
|
+
dataset.insert columns, row
|
102
|
+
rescue => err
|
103
|
+
errors << "#{err} in #{row.inspect}\n#{err.backtrace&.join("\n")}"
|
104
|
+
end
|
105
|
+
}
|
106
|
+
ensure
|
107
|
+
buffer.clear
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def lock(file, mode)
|
112
|
+
if file.flock(mode)
|
113
|
+
begin
|
114
|
+
yield file
|
115
|
+
ensure
|
116
|
+
file.flock(File::LOCK_UN)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
class TransporterError < StandardError; end
|
125
|
+
class TransporterBadFormat < TransporterError; end
|
126
|
+
class TransporterRecordCount < TransporterError; end
|
127
|
+
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
data/lib/tzispa/data/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tzispa_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Juan Antonio Piñero
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sequel
|
@@ -52,6 +52,7 @@ files:
|
|
52
52
|
- lib/tzispa/data/adapter_pool.rb
|
53
53
|
- lib/tzispa/data/entity.rb
|
54
54
|
- lib/tzispa/data/repository.rb
|
55
|
+
- lib/tzispa/data/transporter.rb
|
55
56
|
- lib/tzispa/data/version.rb
|
56
57
|
- lib/tzispa_data.rb
|
57
58
|
homepage: https://github.com/japiber/tzispa_data
|