muzzy 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/muzzy +99 -163
- data/lib/muzzy/database_adapters/adapter_base.rb +11 -0
- data/lib/muzzy/database_adapters/mysql_adapter.rb +113 -0
- data/lib/muzzy/filetype_detector.rb +72 -0
- data/lib/muzzy/header_detector.rb +34 -0
- data/lib/muzzy/version.rb +1 -1
- data/lib/muzzy.rb +4 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0cb78304c4f64a469d72f3fc41d6482dbf0c3d78
|
4
|
+
data.tar.gz: 5241fc24a5787b37fa3057317a8535c95901ad80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 541b75bbfacee9521fdf96e9029d1a765f520771482862b662b1b77e37b09dd634f2371f7fae4597a725f0d4ed98b517730000b9ca152a9be129f59920d516c7
|
7
|
+
data.tar.gz: b0efa731b1bbb93cfb5406bd2b2edb799831a9d5ea4f1f0e1fea8c5a95cc61a887259b1fdd3d79c8fa2e44fdb23290e89d5f899fec88b808ab17209db28c2e2b
|
data/exe/muzzy
CHANGED
@@ -12,9 +12,19 @@ Version = Muzzy::VERSION
|
|
12
12
|
## check environments
|
13
13
|
kakasi_path = File.absolute_path(File.expand_path('..', __FILE__) + "/../vendor/bin/kakasi")
|
14
14
|
unless File.exists?(kakasi_path)
|
15
|
-
puts "cannot find kakasi. please check kakasi installation is successed."
|
16
|
-
puts "expect path: #{kakasi_path}"
|
17
|
-
puts "If you have not execute `muzzy_setup` yet, please run `muzzy_setup` first."
|
15
|
+
$stderr.puts "cannot find kakasi. please check kakasi installation is successed."
|
16
|
+
$stderr.puts "expect path: #{kakasi_path}"
|
17
|
+
$stderr.puts "If you have not execute `muzzy_setup` yet, please run `muzzy_setup` first."
|
18
|
+
exit 1
|
19
|
+
end
|
20
|
+
mysql_cmd = `which mysql`.to_s.chomp
|
21
|
+
if mysql_cmd == ''
|
22
|
+
$stderr.puts "cannot find mysql client"
|
23
|
+
exit 1
|
24
|
+
end
|
25
|
+
mysqlimport_cmd = `which mysqlimport`.to_s.chomp
|
26
|
+
if mysqlimport_cmd == ''
|
27
|
+
$stderr.puts "cannot find mysqlimport command"
|
18
28
|
exit 1
|
19
29
|
end
|
20
30
|
|
@@ -33,25 +43,13 @@ options = {
|
|
33
43
|
},
|
34
44
|
}
|
35
45
|
|
36
|
-
mysql_cmd = `which mysql`.to_s.chomp
|
37
|
-
if mysql_cmd == ''
|
38
|
-
puts "cannot find mysql client"
|
39
|
-
exit 1
|
40
|
-
end
|
41
|
-
|
42
|
-
mysqlimport_cmd = `which mysqlimport`.to_s.chomp
|
43
|
-
if mysqlimport_cmd == ''
|
44
|
-
puts "cannot find mysqlimport command"
|
45
|
-
exit 1
|
46
|
-
end
|
47
|
-
|
48
46
|
def check_file(file)
|
49
47
|
if File.directory?(file)
|
50
|
-
puts "muzzy: #{file}: is directory"
|
48
|
+
$stderr.puts "muzzy: #{file}: is directory"
|
51
49
|
exit Errno::ENOENT::Errno
|
52
50
|
end
|
53
51
|
unless File.exists?(file)
|
54
|
-
puts "muzzy: #{file}: No such file or directory"
|
52
|
+
$stderr.puts "muzzy: #{file}: No such file or directory"
|
55
53
|
exit Errno::ENOENT::Errno
|
56
54
|
end
|
57
55
|
end
|
@@ -75,24 +73,23 @@ ARGV.options do |opt|
|
|
75
73
|
}
|
76
74
|
|
77
75
|
if ARGV[0].to_s.length > 0 && ARGV[0].to_s[0] != '-'
|
78
|
-
check_file(ARGV[0])
|
79
76
|
options[:file] = ARGV[0]
|
77
|
+
elsif ARGV.last.to_s.length > 0 && ARGV.last.to_s[0] != '-'
|
78
|
+
options[:file] = ARGV.last
|
80
79
|
else
|
81
|
-
opt.on('-f', '--file [FILEPATH]', 'path to target file') {|v|
|
82
|
-
check_file(v)
|
83
|
-
options[:file] = v
|
84
|
-
}
|
80
|
+
opt.on('-f', '--file [FILEPATH]', 'path to target file') {|v| options[:file] = v }
|
85
81
|
end
|
82
|
+
check_file(options[:file])
|
86
83
|
|
87
84
|
opt.on('-v', '--version') {
|
88
|
-
puts opt.ver
|
85
|
+
$stdout.puts opt.ver
|
89
86
|
exit
|
90
87
|
}
|
91
88
|
|
92
89
|
opt.parse!
|
93
90
|
|
94
91
|
if options[:file].nil? || options[:file] == ''
|
95
|
-
puts opt.help
|
92
|
+
$stdout.puts opt.help
|
96
93
|
exit 1
|
97
94
|
end
|
98
95
|
rescue => e
|
@@ -102,118 +99,44 @@ ARGV.options do |opt|
|
|
102
99
|
end
|
103
100
|
|
104
101
|
if options[:mysql_config][:database] !~ /\A(\w)+\z/
|
105
|
-
puts "illegal database"
|
102
|
+
$stderr.puts "illegal database name"
|
106
103
|
exit 1
|
107
104
|
end
|
108
105
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
end
|
114
|
-
|
115
|
-
confirm_database_cmd_list = mysql_cmd_list + ['-e', '"' + "SHOW DATABASES LIKE '#{options[:mysql_config][:database]}'" + '"']
|
116
|
-
if options[:verbose]
|
117
|
-
puts confirm_database_cmd_list.join(' ')
|
118
|
-
end
|
119
|
-
|
120
|
-
confirm_database_res = `#{confirm_database_cmd_list.join(' ')}`.chomp
|
121
|
-
if confirm_database_res == ''
|
122
|
-
puts "creating database #{options[:mysql_config][:database]}"
|
123
|
-
system(*mysql_cmd_list, '-e', "CREATE DATABASE #{options[:mysql_config][:database]}")
|
124
|
-
if $? == 0
|
125
|
-
puts "creating database #{options[:mysql_config][:database]} done"
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
## get first and second rows
|
130
|
-
header_row = []
|
131
|
-
first_row = [] # first row means first data row
|
132
|
-
fields_terminated_by = ","
|
133
|
-
|
134
|
-
|
135
|
-
if options[:file] =~ /\.tsv\z/
|
136
|
-
# tsv
|
137
|
-
header_row, first_row = Muzzy::Util.fetch_header_and_first_row(options[:file], "\t")
|
138
|
-
fields_terminated_by = "\t"
|
139
|
-
else
|
140
|
-
# csv(,) or csv(\t) or something
|
141
|
-
|
142
|
-
# trying to csv
|
143
|
-
csv_header_row, csv_first_row = Muzzy::Util.fetch_header_and_first_row(options[:file], ",")
|
144
|
-
# trying to tsv
|
145
|
-
tsv_header_row, tsv_first_row = Muzzy::Util.fetch_header_and_first_row(options[:file], "\t")
|
146
|
-
|
147
|
-
if csv_header_row == -1 && tsv_header_row == -1
|
148
|
-
puts "illegal csv file."
|
149
|
-
exit 1
|
150
|
-
end
|
151
|
-
|
152
|
-
if csv_header_row == -1 || tsv_header_row == -1
|
153
|
-
if csv_header_row == -1
|
154
|
-
# tsv
|
155
|
-
header_row, first_row = tsv_header_row, tsv_first_row
|
156
|
-
fields_terminated_by = "\t"
|
157
|
-
else
|
158
|
-
# csv
|
159
|
-
header_row, first_row = csv_header_row, csv_first_row
|
160
|
-
end
|
161
|
-
else
|
162
|
-
if csv_header_row.length > tsv_header_row.length
|
163
|
-
# csv
|
164
|
-
header_row, first_row = csv_header_row, csv_first_row
|
165
|
-
else
|
166
|
-
# tsv
|
167
|
-
header_row, first_row = tsv_header_row, tsv_first_row
|
168
|
-
fields_terminated_by = "\t"
|
169
|
-
end
|
170
|
-
if csv_header_row.length == tsv_header_row.length
|
171
|
-
if csv_header_row.length == 1 && tsv_first_row.length == 1
|
172
|
-
# single col file
|
173
|
-
header_row, first_row = csv_header_row, csv_first_row
|
174
|
-
else
|
175
|
-
puts "illegal file"
|
176
|
-
exit 1
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
106
|
+
filetype_detector = Muzzy::FiletypeDetector.new(options[:file])
|
107
|
+
if filetype_detector.unknown?
|
108
|
+
$stderr.puts "illegal file"
|
109
|
+
exit 1
|
180
110
|
end
|
181
|
-
header_row = header_row.map(&:to_s)
|
182
111
|
|
112
|
+
first_row = filetype_detector.first_row
|
113
|
+
second_row = filetype_detector.second_row
|
114
|
+
fields_terminated_by = filetype_detector.tsv? ? "\t" : ','
|
183
115
|
|
184
|
-
#
|
185
|
-
|
186
|
-
|
187
|
-
if header_row.any?{|str| str.match(/_id/) }
|
188
|
-
header_row_is_header = true
|
189
|
-
elsif header_row.any?{|str| str.match(/NULL/) }
|
190
|
-
header_row_is_header = false
|
191
|
-
elsif header_row.any?{|str| str.nil? || str == '' }
|
192
|
-
header_row_is_header = false
|
193
|
-
elsif header_row.map(&:to_i).select{|x| x <= 0}.count != first_row.map(&:to_i).select{|x| x <= 0}.count
|
194
|
-
# i guess number col count is different, header_row is header.
|
195
|
-
header_row_is_header = true
|
196
|
-
else
|
116
|
+
# detect first_row is header or data row
|
117
|
+
first_row_is_header = Muzzy::HeaderDetector.detect([first_row, second_row])
|
118
|
+
if first_row_is_header == nil
|
197
119
|
# cannot judge header is header or not
|
198
|
-
puts "
|
199
|
-
puts
|
200
|
-
puts "
|
120
|
+
puts "first row is"
|
121
|
+
puts first_row.join(',')
|
122
|
+
puts "first row is HEADER? [y/n]"
|
201
123
|
loop do
|
202
124
|
y_or_n = $stdin.gets.to_s.chomp
|
203
125
|
if y_or_n.match(/\A[Yy]\z/)
|
204
|
-
|
126
|
+
first_row_is_header = true
|
205
127
|
break
|
206
128
|
end
|
207
129
|
if y_or_n.match(/\A[Nn]\z/)
|
208
|
-
|
130
|
+
first_row_is_header = false
|
209
131
|
break
|
210
132
|
end
|
211
133
|
puts "plase enter y or n"
|
212
134
|
end
|
213
135
|
end
|
214
136
|
|
215
|
-
|
216
|
-
|
137
|
+
|
138
|
+
if first_row_is_header
|
139
|
+
first_row = first_row.map do |str|
|
217
140
|
std_out = Open3.capture2('echo', str)[0]
|
218
141
|
Open3.capture2(kakasi_path, '-Ja', '-Ha', '-Ka', '-Ea', '-i', 'utf8', '-o', 'utf8', stdin_data: std_out)[0]
|
219
142
|
end.map(&:chomp).map do |x|
|
@@ -222,19 +145,35 @@ if header_row_is_header
|
|
222
145
|
end
|
223
146
|
end
|
224
147
|
|
148
|
+
# TODO ヘッダが空白含んでたりするやつとかをなんとかする
|
149
|
+
col_data_types = []
|
225
150
|
Cell = Struct.new(:type, :name)
|
226
|
-
if
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
151
|
+
if first_row_is_header
|
152
|
+
if first_row
|
153
|
+
col_data_types = first_row.map.with_index do |str, i|
|
154
|
+
if str.to_s.match(/\A[\d,]+\z/)
|
155
|
+
# number
|
156
|
+
Cell.new('integer', first_row[i])
|
157
|
+
else
|
158
|
+
Cell.new('text', first_row[i])
|
159
|
+
end
|
160
|
+
end
|
161
|
+
else
|
162
|
+
col_data_types = first_row.map.with_index do |str, i|
|
163
|
+
colname = first_row[i].gsub(/[,-]/, '')
|
164
|
+
if str.to_s.match(/_id/i) && str.to_s.match(/\A[\w]+\z/i)
|
165
|
+
# number
|
166
|
+
Cell.new('integer', colname)
|
167
|
+
else
|
168
|
+
Cell.new('text', colname)
|
169
|
+
end
|
233
170
|
end
|
234
171
|
end
|
235
172
|
else
|
236
|
-
#
|
237
|
-
|
173
|
+
# TODO not create table option
|
174
|
+
|
175
|
+
# first row is data(not header)
|
176
|
+
col_data_types = first_row.map.with_index do |str, i|
|
238
177
|
if str.to_s.match(/\A[\d,]+\z/)
|
239
178
|
# number
|
240
179
|
Cell.new('integer', "col#{i}")
|
@@ -244,48 +183,45 @@ else
|
|
244
183
|
end
|
245
184
|
end
|
246
185
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
186
|
+
config = {
|
187
|
+
filepath: options[:file],
|
188
|
+
cmd_path: mysql_cmd,
|
189
|
+
mysqlimport_path: mysqlimport_cmd,
|
190
|
+
user: options[:mysql_config][:user],
|
191
|
+
host: options[:mysql_config][:host],
|
192
|
+
# password: options[:mysql_config][:password],
|
193
|
+
use_password: options[:mysql_config][:use_password],
|
194
|
+
database_name: options[:mysql_config][:database],
|
195
|
+
}
|
256
196
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
197
|
+
db_adapter = Muzzy::DatabaseAdapter::MysqlAdapter.new(
|
198
|
+
config,
|
199
|
+
verbose: options[:verbose]
|
200
|
+
)
|
201
|
+
unless db_adapter.confirm_database
|
202
|
+
unless db_adapter.create_database
|
203
|
+
# database create failed
|
204
|
+
exit 1
|
262
205
|
end
|
263
|
-
`#{create_database_cmd_list.join(' ')}`
|
264
206
|
end
|
265
207
|
|
266
|
-
|
267
|
-
|
268
|
-
mysql_options.push('-p')
|
269
|
-
end
|
270
|
-
cmds = [
|
271
|
-
mysqlimport_cmd,
|
272
|
-
*mysql_options,
|
273
|
-
options[:mysql_config][:database],
|
274
|
-
'--local', options[:file],
|
275
|
-
"--ignore-lines=#{header_row_is_header ? 1 : 0}",
|
276
|
-
]
|
277
|
-
if fields_terminated_by
|
278
|
-
cmds.push("--fields_terminated_by=#{fields_terminated_by}")
|
279
|
-
end
|
280
|
-
if options[:mysqlimport_config][:delete]
|
281
|
-
cmds.push('--delete')
|
282
|
-
end
|
283
|
-
cmds.push('--fields_enclosed_by="')
|
208
|
+
filename = File.basename(options[:file])
|
209
|
+
table_name = filename.match(/\A(\w+)(\.\w+)?\z/)[1]
|
284
210
|
|
285
|
-
|
286
|
-
|
211
|
+
# confirm table
|
212
|
+
unless db_adapter.confirm_table(table_name)
|
213
|
+
# cannot confirm table so create table
|
214
|
+
unless db_adapter.create_table(table_name, col_data_types)
|
215
|
+
# error, cannot create table
|
216
|
+
exit 1
|
217
|
+
end
|
287
218
|
end
|
288
219
|
|
289
|
-
#
|
290
|
-
|
220
|
+
# import
|
221
|
+
db_adapter.import(table_name, {
|
222
|
+
first_row_is_header: first_row_is_header,
|
223
|
+
fields_terminated_by: fields_terminated_by,
|
224
|
+
delete: options[:mysqlimport_config][:delete]
|
225
|
+
})
|
226
|
+
|
291
227
|
exit 0
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Muzzy
|
2
|
+
module DatabaseAdapter
|
3
|
+
class MysqlAdapter < AdapterBase
|
4
|
+
def initialize(config, verbose: false)
|
5
|
+
@filepath = config[:filepath]
|
6
|
+
@cmd_path = config[:cmd_path]
|
7
|
+
@import_cmd_path = config[:mysqlimport_path]
|
8
|
+
@user = config[:user]
|
9
|
+
@host = config[:host]
|
10
|
+
@password = config[:password]
|
11
|
+
@use_password = config[:use_password]
|
12
|
+
@database_name = config[:database_name]
|
13
|
+
@verbose = verbose
|
14
|
+
end
|
15
|
+
|
16
|
+
# [Bool] true: exists, false: nothing
|
17
|
+
def confirm_database
|
18
|
+
confirm_database_cmd_list = mysql_cmd_list + ['-e', '"' + "SHOW DATABASES LIKE '#{@database_name}'" + '"']
|
19
|
+
confirm_database_cmd = confirm_database_cmd_list.join(' ')
|
20
|
+
if @verbose
|
21
|
+
$stderr.puts confirm_database_cmd
|
22
|
+
end
|
23
|
+
`#{confirm_database_cmd}`.chomp != ''
|
24
|
+
end
|
25
|
+
|
26
|
+
# [Bool] true: created, false: error happened
|
27
|
+
def create_database
|
28
|
+
$stderr.puts "creating database #{@database_name}"
|
29
|
+
|
30
|
+
create_database_cmd_list = [*mysql_cmd_list, '-e', "'CREATE DATABASE #{@database_name}'"]
|
31
|
+
create_database_cmd = create_database_cmd_list.join(' ')
|
32
|
+
|
33
|
+
if @verbose
|
34
|
+
$stderr.puts create_database_cmd
|
35
|
+
end
|
36
|
+
|
37
|
+
std_out = Open3.capture2(create_database_cmd)[0]
|
38
|
+
if std_out == ''
|
39
|
+
$stderr.puts "creating database #{@database_name} done"
|
40
|
+
else
|
41
|
+
$stderr.puts "error: #{std_out}"
|
42
|
+
end
|
43
|
+
|
44
|
+
std_out == ''
|
45
|
+
end
|
46
|
+
|
47
|
+
# [Bool] true: table exists, false: no table
|
48
|
+
def confirm_table(table_name)
|
49
|
+
confirm_table_cmd_list = [*mysql_cmd_list, @database_name, '-e', "'SHOW CREATE TABLE #{table_name}'"]
|
50
|
+
confirm_table_cmd = confirm_table_cmd_list.join(' ')
|
51
|
+
|
52
|
+
if @verbose
|
53
|
+
$stderr.puts confirm_table_cmd
|
54
|
+
end
|
55
|
+
o, e, status = Open3.capture3(confirm_table_cmd)
|
56
|
+
return status.success?
|
57
|
+
end
|
58
|
+
|
59
|
+
# [Bool] true: table created, false: some error happened
|
60
|
+
def create_table(table_name, col_data_types)
|
61
|
+
create_table_sql = "CREATE TABLE #{table_name} (#{col_data_types.map{|x| "#{x.name} #{x.type}"}.join(', ')})"
|
62
|
+
create_table_cmd_list = [*mysql_cmd_list, @database_name, '-e', '"', "#{create_table_sql}", '"']
|
63
|
+
create_table_cmd = create_table_cmd_list.join(' ')
|
64
|
+
if @verbose
|
65
|
+
$stderr.puts create_table_cmd
|
66
|
+
end
|
67
|
+
std_out, status = Open3.capture2(create_table_cmd)
|
68
|
+
return status.success?
|
69
|
+
end
|
70
|
+
|
71
|
+
# {fields_terminated_by: '', delete: false,}
|
72
|
+
def import(table_name, option)
|
73
|
+
cmds = [*mysqlimport_cmd_list, @database_name, '--local', @filepath]
|
74
|
+
cmds.push "--ignore-lines=#{option[:first_row_is_header] ? 1 : 0}"
|
75
|
+
cmds.push('--fields_enclosed_by="')
|
76
|
+
|
77
|
+
if option[:fields_terminated_by]
|
78
|
+
cmds.push("--fields_terminated_by=#{option[:fields_terminated_by]}")
|
79
|
+
end
|
80
|
+
if option[:delete]
|
81
|
+
cmds.push('--delete')
|
82
|
+
end
|
83
|
+
|
84
|
+
if @verbose
|
85
|
+
$stderr.puts cmds.join(' ')
|
86
|
+
end
|
87
|
+
|
88
|
+
# mysqlimport data
|
89
|
+
system(*cmds)
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def mysql_cmd_list
|
95
|
+
return @mysql_cmd_list if defined?(@mysql_cmd_list)
|
96
|
+
@mysql_cmd_list = [@cmd_path] + common_param
|
97
|
+
end
|
98
|
+
|
99
|
+
def mysqlimport_cmd_list
|
100
|
+
return @mysqlimport_cmd_list if defined?(@mysqlimport_cmd_list)
|
101
|
+
@mysqlimport_cmd_list = [@import_cmd_path] + common_param
|
102
|
+
end
|
103
|
+
|
104
|
+
def common_param
|
105
|
+
list = ['-u', @user, '-h', @host]
|
106
|
+
if @use_password
|
107
|
+
@list.push('-p')
|
108
|
+
end
|
109
|
+
list
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -1,4 +1,76 @@
|
|
1
1
|
module Muzzy
|
2
|
+
# read file content and detect filetype is csv or tsv or others
|
2
3
|
class FiletypeDetector
|
4
|
+
attr_reader :filepath
|
5
|
+
attr_reader :first_row, :second_row
|
6
|
+
attr_reader :filetype
|
7
|
+
|
8
|
+
def initialize(filepath)
|
9
|
+
@filepath = filepath
|
10
|
+
end
|
11
|
+
|
12
|
+
def tsv?
|
13
|
+
detect || @filetype == 'tsv'
|
14
|
+
end
|
15
|
+
|
16
|
+
def csv?
|
17
|
+
detect || @filetype == 'csv'
|
18
|
+
end
|
19
|
+
|
20
|
+
def unknown?
|
21
|
+
detect || @filetype == 'unknown'
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def tsv_ext?
|
27
|
+
File.basename(@filepath) =~ /\.tsv\z/
|
28
|
+
end
|
29
|
+
|
30
|
+
def detect
|
31
|
+
return unless @filetype.nil?
|
32
|
+
|
33
|
+
if tsv_ext?
|
34
|
+
@first_row, @second_row = Muzzy::Util.fetch_header_and_first_row(@filepath, "\t")
|
35
|
+
@filetype = 'tsv'
|
36
|
+
return
|
37
|
+
end
|
38
|
+
|
39
|
+
## csv(,) or csv(\t) or something
|
40
|
+
|
41
|
+
csv_header_row, csv_first_row = Muzzy::Util.fetch_header_and_first_row(@filepath, ",")
|
42
|
+
tsv_header_row, tsv_first_row = Muzzy::Util.fetch_header_and_first_row(@filepath, "\t")
|
43
|
+
if csv_header_row == -1 && tsv_header_row == -1
|
44
|
+
@first_row, @second_row, @filetype = -1, -1, 'unknown'
|
45
|
+
return
|
46
|
+
end
|
47
|
+
|
48
|
+
if csv_header_row == -1
|
49
|
+
@first_row, @second_row, @filetype = tsv_header_row, tsv_first_row, 'tsv'
|
50
|
+
return
|
51
|
+
end
|
52
|
+
if tsv_header_row == -1
|
53
|
+
@first_row, @second_row, @filetype = csv_header_row, csv_first_row, 'csv'
|
54
|
+
return
|
55
|
+
end
|
56
|
+
|
57
|
+
## rare case
|
58
|
+
|
59
|
+
if csv_header_row.length > tsv_header_row.length
|
60
|
+
@first_row, @second_row, @filetype = csv_header_row, csv_first_row, 'csv'
|
61
|
+
return
|
62
|
+
else
|
63
|
+
@first_row, @second_row, @filetype = tsv_header_row, tsv_first_row, 'tsv'
|
64
|
+
return
|
65
|
+
end
|
66
|
+
|
67
|
+
if csv_header_row.length == 1 && tsv_first_row.length == 1
|
68
|
+
# single col file treat as csv
|
69
|
+
@first_row, @second_row, @filetype = csv_header_row, csv_first_row, 'csv'
|
70
|
+
return
|
71
|
+
end
|
72
|
+
|
73
|
+
@first_row, @second_row, @filetype = -1, -1, 'unknown'
|
74
|
+
end
|
3
75
|
end
|
4
76
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Muzzy
|
2
|
+
class HeaderDetector
|
3
|
+
# true: first row is header
|
4
|
+
# false: first row is not header
|
5
|
+
# nil: could not detect
|
6
|
+
def self.detect(rows)
|
7
|
+
first_row, second_row = rows || []
|
8
|
+
return nil if first_row.empty?
|
9
|
+
|
10
|
+
if first_row.any?{|str| str.to_s.match(/_id/i) }
|
11
|
+
return true
|
12
|
+
elsif first_row.any?{|str| str.to_s.match(/NULL/) }
|
13
|
+
return false
|
14
|
+
elsif first_row.any?{|str| str.to_s == '' }
|
15
|
+
return false
|
16
|
+
end
|
17
|
+
|
18
|
+
return nil if second_row.empty?
|
19
|
+
|
20
|
+
# I can't detect first_row is header or not, so guess now.
|
21
|
+
|
22
|
+
# general header row is not contain numbers
|
23
|
+
first_row_number_count = first_row.select{|str| str.to_f > 0}.length
|
24
|
+
return false if first_row_number_count > 0
|
25
|
+
|
26
|
+
# If number col count is different, I guess first_row is header.
|
27
|
+
if first_row_number_count != second_row.select{|x| x.to_f > 0}.count
|
28
|
+
return true
|
29
|
+
end
|
30
|
+
|
31
|
+
return nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/muzzy/version.rb
CHANGED
data/lib/muzzy.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
require 'muzzy/version'
|
2
2
|
require_relative 'muzzy/util'
|
3
|
+
require_relative 'muzzy/filetype_detector'
|
4
|
+
require_relative 'muzzy/header_detector'
|
5
|
+
require_relative 'muzzy/database_adapters/adapter_base'
|
6
|
+
require_relative 'muzzy/database_adapters/mysql_adapter'
|
3
7
|
|
4
8
|
module Muzzy
|
5
9
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: muzzy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- vimtaku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -90,7 +90,10 @@ files:
|
|
90
90
|
- exe/muzzy
|
91
91
|
- exe/muzzy_setup
|
92
92
|
- lib/muzzy.rb
|
93
|
+
- lib/muzzy/database_adapters/adapter_base.rb
|
94
|
+
- lib/muzzy/database_adapters/mysql_adapter.rb
|
93
95
|
- lib/muzzy/filetype_detector.rb
|
96
|
+
- lib/muzzy/header_detector.rb
|
94
97
|
- lib/muzzy/util.rb
|
95
98
|
- lib/muzzy/version.rb
|
96
99
|
- muzzy.gemspec
|