muzzy 0.1.13 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/muzzy +4 -52
- data/lib/muzzy.rb +2 -0
- data/lib/muzzy/column.rb +10 -0
- data/lib/muzzy/columns_generator.rb +41 -0
- data/lib/muzzy/database_adapters/adapter_base.rb +3 -3
- data/lib/muzzy/database_adapters/mysql_adapter.rb +16 -3
- data/lib/muzzy/header_detector.rb +7 -5
- data/lib/muzzy/util.rb +17 -7
- data/lib/muzzy/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6d05258a12af946f1c15258b363540856fcdd32
|
4
|
+
data.tar.gz: 41d9d79203216ae03187d1abb1fae0aee0e72d11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36d777b102c5797d9c74fd7ca9121062949b22b01ca0c597a92ca28335b61de9c09f2843cb2aacb000329f6d2df77c44bc72d16020b6a4758e1efe2aaf0b9314
|
7
|
+
data.tar.gz: 048fdc85d6d4d68c4d3844ab76c7bd8fe5cf500d2bbe996d5e4fb3ad4b48e9d5122aaa7a2146b7f63098c0023fa6b44660a79ae6c9baf45cf9a7da5c7c16fdf2
|
data/exe/muzzy
CHANGED
@@ -28,7 +28,6 @@ if mysqlimport_cmd == ''
|
|
28
28
|
exit 1
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
31
|
options = {
|
33
32
|
file: '',
|
34
33
|
verbose: false,
|
@@ -136,55 +135,8 @@ if first_row_is_header == nil
|
|
136
135
|
end
|
137
136
|
end
|
138
137
|
|
139
|
-
|
140
|
-
|
141
|
-
first_row = first_row.map do |str|
|
142
|
-
std_out = Open3.capture2('echo', str)[0]
|
143
|
-
Open3.capture2(kakasi_path, '-Ja', '-Ha', '-Ka', '-Ea', '-i', 'utf8', '-o', 'utf8', stdin_data: std_out)[0]
|
144
|
-
end.map do |x|
|
145
|
-
# kakasi returns ko^do if 'コード' given so replace it to _
|
146
|
-
# space changes to _
|
147
|
-
x.chomp.strip.gsub(/[\^]/, '_').gsub(/\s/, '_')
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
# TODO ヘッダが空白含んでたりするやつとかをなんとかする
|
152
|
-
col_data_types = []
|
153
|
-
Cell = Struct.new(:type, :name)
|
154
|
-
if first_row_is_header
|
155
|
-
if first_row
|
156
|
-
col_data_types = first_row.map.with_index do |str, i|
|
157
|
-
if str.to_s.match(/\A[\d,]+\z/)
|
158
|
-
# number
|
159
|
-
Cell.new('integer', first_row[i])
|
160
|
-
else
|
161
|
-
Cell.new('text', first_row[i])
|
162
|
-
end
|
163
|
-
end
|
164
|
-
else
|
165
|
-
col_data_types = first_row.map.with_index do |str, i|
|
166
|
-
colname = first_row[i].gsub(/[,-]/, '')
|
167
|
-
if str.to_s.match(/_id/i) && str.to_s.match(/\A[\w]+\z/i)
|
168
|
-
# number
|
169
|
-
Cell.new('integer', colname)
|
170
|
-
else
|
171
|
-
Cell.new('text', colname)
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
else
|
176
|
-
# TODO not create table option
|
177
|
-
|
178
|
-
# first row is data(not header)
|
179
|
-
col_data_types = first_row.map.with_index do |str, i|
|
180
|
-
if str.to_s.match(/\A[\d,]+\z/)
|
181
|
-
# number
|
182
|
-
Cell.new('integer', "col#{i}")
|
183
|
-
else
|
184
|
-
Cell.new('text', "col#{i}")
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
138
|
+
column_generator = Muzzy::ColumnsGenerator.new(kakasi_path)
|
139
|
+
columns = column_generator.generate(first_row_is_header, first_row)
|
188
140
|
|
189
141
|
config = {
|
190
142
|
filepath: options[:file],
|
@@ -197,7 +149,7 @@ config = {
|
|
197
149
|
database_name: options[:mysql_config][:database],
|
198
150
|
}
|
199
151
|
|
200
|
-
db_adapter = Muzzy::
|
152
|
+
db_adapter = Muzzy::DatabaseAdapters::MysqlAdapter.new(
|
201
153
|
config,
|
202
154
|
verbose: options[:verbose]
|
203
155
|
)
|
@@ -214,7 +166,7 @@ table_name = filename.match(/\A(\w+)(\.\w+)?\z/)[1]
|
|
214
166
|
# confirm table
|
215
167
|
unless db_adapter.confirm_table(table_name)
|
216
168
|
# cannot confirm table so create table
|
217
|
-
unless db_adapter.create_table(table_name,
|
169
|
+
unless db_adapter.create_table(table_name, columns)
|
218
170
|
# error, cannot create table
|
219
171
|
exit 1
|
220
172
|
end
|
data/lib/muzzy.rb
CHANGED
@@ -2,6 +2,8 @@ require 'muzzy/version'
|
|
2
2
|
require_relative 'muzzy/util'
|
3
3
|
require_relative 'muzzy/filetype_detector'
|
4
4
|
require_relative 'muzzy/header_detector'
|
5
|
+
require_relative 'muzzy/column'
|
6
|
+
require_relative 'muzzy/columns_generator'
|
5
7
|
require_relative 'muzzy/database_adapters/adapter_base'
|
6
8
|
require_relative 'muzzy/database_adapters/mysql_adapter'
|
7
9
|
|
data/lib/muzzy/column.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
module Muzzy
|
2
|
+
# generate columns from first row data
|
3
|
+
class ColumnsGenerator
|
4
|
+
def initialize(kakasi_path)
|
5
|
+
@kakasi_path = kakasi_path
|
6
|
+
end
|
7
|
+
|
8
|
+
def generate(first_row_is_header, first_row)
|
9
|
+
columns = []
|
10
|
+
# convert header row to compatible with database table columns
|
11
|
+
# ref https://dev.mysql.com/doc/refman/5.6/ja/identifiers.html
|
12
|
+
if first_row_is_header
|
13
|
+
# create column names from japanese headers.
|
14
|
+
# trim invalid chars
|
15
|
+
# noop if data is already ascii
|
16
|
+
coulumn_names = first_row.map do |str|
|
17
|
+
std_out = Open3.capture2('echo', str)[0]
|
18
|
+
Open3.capture2(
|
19
|
+
@kakasi_path,
|
20
|
+
'-Ja', '-Ha', '-Ka', '-Ea', '-i', 'utf8', '-o', 'utf8',
|
21
|
+
stdin_data: std_out
|
22
|
+
)[0]
|
23
|
+
end.map do |x|
|
24
|
+
# kakasi returns ko^do if 'コード' given so replace it to _
|
25
|
+
# space changes to _
|
26
|
+
x.chomp.strip.gsub(/[\^]/, '_').gsub(/\s/, '_')
|
27
|
+
end || first_row.dup
|
28
|
+
|
29
|
+
columns = coulumn_names.map.with_index do |str, i|
|
30
|
+
Muzzy::Column.new('text', coulumn_names[i])
|
31
|
+
end
|
32
|
+
else
|
33
|
+
# first row is data (not header)
|
34
|
+
columns = first_row.map.with_index do |str, i|
|
35
|
+
Muzzy::Column.new('text', "col#{i}")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
columns
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module Muzzy
|
2
|
-
module
|
2
|
+
module DatabaseAdapters
|
3
3
|
class AdapterBase
|
4
4
|
def confirm_database; end
|
5
5
|
def create_database; end
|
6
|
-
def confirm_table; end
|
7
|
-
def create_table; end
|
6
|
+
def confirm_table(table_name); end
|
7
|
+
def create_table(table_name, columns); end
|
8
8
|
def import; end
|
9
9
|
end
|
10
10
|
end
|
@@ -1,5 +1,6 @@
|
|
1
|
+
require 'nkf'
|
1
2
|
module Muzzy
|
2
|
-
module
|
3
|
+
module DatabaseAdapters
|
3
4
|
class MysqlAdapter < AdapterBase
|
4
5
|
def initialize(config, verbose: false)
|
5
6
|
@filepath = config[:filepath]
|
@@ -57,8 +58,8 @@ module Muzzy
|
|
57
58
|
end
|
58
59
|
|
59
60
|
# [Bool] true: table created, false: some error happened
|
60
|
-
def create_table(table_name,
|
61
|
-
create_table_sql = "CREATE TABLE #{table_name} (#{
|
61
|
+
def create_table(table_name, columns)
|
62
|
+
create_table_sql = "CREATE TABLE #{table_name} (#{columns.map{|x| "#{x.name} #{x.datatype}"}.join(', ')})"
|
62
63
|
create_table_cmd_list = [*mysql_cmd_list, @database_name, '-e', '"', "#{create_table_sql}", '"']
|
63
64
|
create_table_cmd = create_table_cmd_list.join(' ')
|
64
65
|
if @verbose
|
@@ -73,6 +74,7 @@ module Muzzy
|
|
73
74
|
cmds = [*mysqlimport_cmd_list, @database_name, '--local', @filepath]
|
74
75
|
cmds.push "--ignore-lines=#{option[:first_row_is_header] ? 1 : 0}"
|
75
76
|
cmds.push('--fields_enclosed_by="')
|
77
|
+
cmds.push("--lines-terminated-by=#{lines_terminated_by}")
|
76
78
|
|
77
79
|
if option[:fields_terminated_by]
|
78
80
|
cmds.push("--fields_terminated_by=#{option[:fields_terminated_by]}")
|
@@ -91,6 +93,17 @@ module Muzzy
|
|
91
93
|
|
92
94
|
private
|
93
95
|
|
96
|
+
def lines_terminated_by
|
97
|
+
std_out, _ = Open3.capture2('file', @filepath)
|
98
|
+
if std_out =~ /with\sCR\s/
|
99
|
+
return '\r'
|
100
|
+
end
|
101
|
+
if std_out =~ /with\sCRLF\s/
|
102
|
+
return '\r\n'
|
103
|
+
end
|
104
|
+
return '\n'
|
105
|
+
end
|
106
|
+
|
94
107
|
def mysql_cmd_list
|
95
108
|
return @mysql_cmd_list if defined?(@mysql_cmd_list)
|
96
109
|
@mysql_cmd_list = [@cmd_path] + common_param
|
@@ -5,7 +5,7 @@ module Muzzy
|
|
5
5
|
# nil: could not detect
|
6
6
|
def self.detect(rows)
|
7
7
|
first_row, second_row = rows || []
|
8
|
-
return nil if first_row.empty?
|
8
|
+
return nil if first_row.nil? || first_row.empty?
|
9
9
|
|
10
10
|
if first_row.any?{|str| str.to_s.match(/_id/i) }
|
11
11
|
return true
|
@@ -15,15 +15,17 @@ module Muzzy
|
|
15
15
|
return false
|
16
16
|
end
|
17
17
|
|
18
|
-
return nil if second_row.empty?
|
18
|
+
return nil if second_row.nil? || second_row.empty?
|
19
19
|
|
20
20
|
# I can't detect first_row is header or not, so guess now.
|
21
21
|
|
22
|
-
#
|
22
|
+
# header row is not contain numbers in most cases
|
23
23
|
first_row_number_count = first_row.select{|str| str.to_f > 0}.length
|
24
|
-
|
24
|
+
if first_row_number_count > 0
|
25
|
+
return false
|
26
|
+
end
|
25
27
|
|
26
|
-
# If number col count is different,
|
28
|
+
# If number col count is different, first_row is header.
|
27
29
|
if first_row_number_count != second_row.select{|x| x.to_f > 0}.count
|
28
30
|
return true
|
29
31
|
end
|
data/lib/muzzy/util.rb
CHANGED
@@ -1,19 +1,29 @@
|
|
1
1
|
require 'csv'
|
2
|
+
require 'nkf'
|
2
3
|
module Muzzy
|
3
4
|
class Util
|
4
5
|
def self.fetch_header_and_first_row(filepath, col_sep)
|
5
6
|
raise ArgumentError, "filepath required" if filepath.nil?
|
6
7
|
raise ArgumentError, "not found file" unless File.exists?(filepath)
|
7
8
|
header_row, first_row = nil, nil
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
|
10
|
+
from_fenc = 'UTF-8'
|
11
|
+
File.open(filepath, "rt") do |f|
|
12
|
+
s = f.readlines
|
13
|
+
from_fenc = NKF.guess(s.join).to_s
|
14
|
+
end
|
15
|
+
open(filepath, "rb:#{from_fenc}:UTF-8", undef: :replace) do |f|
|
16
|
+
CSV.new(f, col_sep: col_sep).each.with_index do |row, i|
|
17
|
+
if i == 0
|
18
|
+
header_row = row
|
19
|
+
elsif i == 1
|
20
|
+
first_row = row
|
21
|
+
else
|
22
|
+
break
|
23
|
+
end
|
15
24
|
end
|
16
25
|
end
|
26
|
+
|
17
27
|
return [header_row, first_row]
|
18
28
|
rescue ArgumentError => e
|
19
29
|
raise e
|
data/lib/muzzy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: muzzy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- vimtaku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -90,6 +90,8 @@ files:
|
|
90
90
|
- exe/muzzy
|
91
91
|
- exe/muzzy_setup
|
92
92
|
- lib/muzzy.rb
|
93
|
+
- lib/muzzy/column.rb
|
94
|
+
- lib/muzzy/columns_generator.rb
|
93
95
|
- lib/muzzy/database_adapters/adapter_base.rb
|
94
96
|
- lib/muzzy/database_adapters/mysql_adapter.rb
|
95
97
|
- lib/muzzy/filetype_detector.rb
|