muzzy 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 70311bea7d537c03c4f4a1980a3333701752c0db
4
- data.tar.gz: 52a3d53769438167c321c5631cb7baa5e15da5b8
3
+ metadata.gz: d6d05258a12af946f1c15258b363540856fcdd32
4
+ data.tar.gz: 41d9d79203216ae03187d1abb1fae0aee0e72d11
5
5
  SHA512:
6
- metadata.gz: 86d2d36ca0fa3eb1cc41c8a3c604a5f910e9d8006ea941fe94f8a3c013fe41f18dbd2f1b764a7a708bdf04ccb0a54fe81ae6da4f1f8785c86c891ab82aa0739a
7
- data.tar.gz: 4ba9b910287dbe31f566a6d396cfad67ff44828d87173e96ca85b95b079b407c04bd2343377bc45f0b9a0e1b9a19130f5d2d45918c871271738d6190a66340f0
6
+ metadata.gz: 36d777b102c5797d9c74fd7ca9121062949b22b01ca0c597a92ca28335b61de9c09f2843cb2aacb000329f6d2df77c44bc72d16020b6a4758e1efe2aaf0b9314
7
+ data.tar.gz: 048fdc85d6d4d68c4d3844ab76c7bd8fe5cf500d2bbe996d5e4fb3ad4b48e9d5122aaa7a2146b7f63098c0023fa6b44660a79ae6c9baf45cf9a7da5c7c16fdf2
data/exe/muzzy CHANGED
@@ -28,7 +28,6 @@ if mysqlimport_cmd == ''
28
28
  exit 1
29
29
  end
30
30
 
31
-
32
31
  options = {
33
32
  file: '',
34
33
  verbose: false,
@@ -136,55 +135,8 @@ if first_row_is_header == nil
136
135
  end
137
136
  end
138
137
 
139
- # convert header row to compatible with database table columns
140
- if first_row_is_header
141
- first_row = first_row.map do |str|
142
- std_out = Open3.capture2('echo', str)[0]
143
- Open3.capture2(kakasi_path, '-Ja', '-Ha', '-Ka', '-Ea', '-i', 'utf8', '-o', 'utf8', stdin_data: std_out)[0]
144
- end.map do |x|
145
- # kakasi returns ko^do if 'コード' given so replace it to _
146
- # space changes to _
147
- x.chomp.strip.gsub(/[\^]/, '_').gsub(/\s/, '_')
148
- end
149
- end
150
-
151
- # TODO ヘッダが空白含んでたりするやつとかをなんとかする
152
- col_data_types = []
153
- Cell = Struct.new(:type, :name)
154
- if first_row_is_header
155
- if first_row
156
- col_data_types = first_row.map.with_index do |str, i|
157
- if str.to_s.match(/\A[\d,]+\z/)
158
- # number
159
- Cell.new('integer', first_row[i])
160
- else
161
- Cell.new('text', first_row[i])
162
- end
163
- end
164
- else
165
- col_data_types = first_row.map.with_index do |str, i|
166
- colname = first_row[i].gsub(/[,-]/, '')
167
- if str.to_s.match(/_id/i) && str.to_s.match(/\A[\w]+\z/i)
168
- # number
169
- Cell.new('integer', colname)
170
- else
171
- Cell.new('text', colname)
172
- end
173
- end
174
- end
175
- else
176
- # TODO not create table option
177
-
178
- # first row is data(not header)
179
- col_data_types = first_row.map.with_index do |str, i|
180
- if str.to_s.match(/\A[\d,]+\z/)
181
- # number
182
- Cell.new('integer', "col#{i}")
183
- else
184
- Cell.new('text', "col#{i}")
185
- end
186
- end
187
- end
138
+ column_generator = Muzzy::ColumnsGenerator.new(kakasi_path)
139
+ columns = column_generator.generate(first_row_is_header, first_row)
188
140
 
189
141
  config = {
190
142
  filepath: options[:file],
@@ -197,7 +149,7 @@ config = {
197
149
  database_name: options[:mysql_config][:database],
198
150
  }
199
151
 
200
- db_adapter = Muzzy::DatabaseAdapter::MysqlAdapter.new(
152
+ db_adapter = Muzzy::DatabaseAdapters::MysqlAdapter.new(
201
153
  config,
202
154
  verbose: options[:verbose]
203
155
  )
@@ -214,7 +166,7 @@ table_name = filename.match(/\A(\w+)(\.\w+)?\z/)[1]
214
166
  # confirm table
215
167
  unless db_adapter.confirm_table(table_name)
216
168
  # cannot confirm table so create table
217
- unless db_adapter.create_table(table_name, col_data_types)
169
+ unless db_adapter.create_table(table_name, columns)
218
170
  # error, cannot create table
219
171
  exit 1
220
172
  end
@@ -2,6 +2,8 @@ require 'muzzy/version'
2
2
  require_relative 'muzzy/util'
3
3
  require_relative 'muzzy/filetype_detector'
4
4
  require_relative 'muzzy/header_detector'
5
+ require_relative 'muzzy/column'
6
+ require_relative 'muzzy/columns_generator'
5
7
  require_relative 'muzzy/database_adapters/adapter_base'
6
8
  require_relative 'muzzy/database_adapters/mysql_adapter'
7
9
 
@@ -0,0 +1,10 @@
1
+ module Muzzy
2
+ class Column
3
+ attr_reader :name
4
+ attr_reader :datatype
5
+ def initialize(datatype, name)
6
+ @datatype = datatype
7
+ @name = name
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,41 @@
1
+ module Muzzy
2
+ # generate columns from first row data
3
+ class ColumnsGenerator
4
+ def initialize(kakasi_path)
5
+ @kakasi_path = kakasi_path
6
+ end
7
+
8
+ def generate(first_row_is_header, first_row)
9
+ columns = []
10
+ # convert header row to compatible with database table columns
11
+ # ref https://dev.mysql.com/doc/refman/5.6/ja/identifiers.html
12
+ if first_row_is_header
13
+ # create column names from japanese headers.
14
+ # trim invalid chars
15
+ # noop if data is already ascii
16
+ coulumn_names = first_row.map do |str|
17
+ std_out = Open3.capture2('echo', str)[0]
18
+ Open3.capture2(
19
+ @kakasi_path,
20
+ '-Ja', '-Ha', '-Ka', '-Ea', '-i', 'utf8', '-o', 'utf8',
21
+ stdin_data: std_out
22
+ )[0]
23
+ end.map do |x|
24
+ # kakasi returns ko^do if 'コード' given so replace it to _
25
+ # space changes to _
26
+ x.chomp.strip.gsub(/[\^]/, '_').gsub(/\s/, '_')
27
+ end || first_row.dup
28
+
29
+ columns = coulumn_names.map.with_index do |str, i|
30
+ Muzzy::Column.new('text', coulumn_names[i])
31
+ end
32
+ else
33
+ # first row is data (not header)
34
+ columns = first_row.map.with_index do |str, i|
35
+ Muzzy::Column.new('text', "col#{i}")
36
+ end
37
+ end
38
+ columns
39
+ end
40
+ end
41
+ end
@@ -1,10 +1,10 @@
1
1
  module Muzzy
2
- module DatabaseAdapter
2
+ module DatabaseAdapters
3
3
  class AdapterBase
4
4
  def confirm_database; end
5
5
  def create_database; end
6
- def confirm_table; end
7
- def create_table; end
6
+ def confirm_table(table_name); end
7
+ def create_table(table_name, columns); end
8
8
  def import; end
9
9
  end
10
10
  end
@@ -1,5 +1,6 @@
1
+ require 'nkf'
1
2
  module Muzzy
2
- module DatabaseAdapter
3
+ module DatabaseAdapters
3
4
  class MysqlAdapter < AdapterBase
4
5
  def initialize(config, verbose: false)
5
6
  @filepath = config[:filepath]
@@ -57,8 +58,8 @@ module Muzzy
57
58
  end
58
59
 
59
60
  # [Bool] true: table created, false: some error happened
60
- def create_table(table_name, col_data_types)
61
- create_table_sql = "CREATE TABLE #{table_name} (#{col_data_types.map{|x| "#{x.name} #{x.type}"}.join(', ')})"
61
+ def create_table(table_name, columns)
62
+ create_table_sql = "CREATE TABLE #{table_name} (#{columns.map{|x| "#{x.name} #{x.datatype}"}.join(', ')})"
62
63
  create_table_cmd_list = [*mysql_cmd_list, @database_name, '-e', '"', "#{create_table_sql}", '"']
63
64
  create_table_cmd = create_table_cmd_list.join(' ')
64
65
  if @verbose
@@ -73,6 +74,7 @@ module Muzzy
73
74
  cmds = [*mysqlimport_cmd_list, @database_name, '--local', @filepath]
74
75
  cmds.push "--ignore-lines=#{option[:first_row_is_header] ? 1 : 0}"
75
76
  cmds.push('--fields_enclosed_by="')
77
+ cmds.push("--lines-terminated-by=#{lines_terminated_by}")
76
78
 
77
79
  if option[:fields_terminated_by]
78
80
  cmds.push("--fields_terminated_by=#{option[:fields_terminated_by]}")
@@ -91,6 +93,17 @@ module Muzzy
91
93
 
92
94
  private
93
95
 
96
+ def lines_terminated_by
97
+ std_out, _ = Open3.capture2('file', @filepath)
98
+ if std_out =~ /with\sCR\s/
99
+ return '\r'
100
+ end
101
+ if std_out =~ /with\sCRLF\s/
102
+ return '\r\n'
103
+ end
104
+ return '\n'
105
+ end
106
+
94
107
  def mysql_cmd_list
95
108
  return @mysql_cmd_list if defined?(@mysql_cmd_list)
96
109
  @mysql_cmd_list = [@cmd_path] + common_param
@@ -5,7 +5,7 @@ module Muzzy
5
5
  # nil: could not detect
6
6
  def self.detect(rows)
7
7
  first_row, second_row = rows || []
8
- return nil if first_row.empty?
8
+ return nil if first_row.nil? || first_row.empty?
9
9
 
10
10
  if first_row.any?{|str| str.to_s.match(/_id/i) }
11
11
  return true
@@ -15,15 +15,17 @@ module Muzzy
15
15
  return false
16
16
  end
17
17
 
18
- return nil if second_row.empty?
18
+ return nil if second_row.nil? || second_row.empty?
19
19
 
20
20
  # I can't detect first_row is header or not, so guess now.
21
21
 
22
- # general header row is not contain numbers
22
+ # header row is not contain numbers in most cases
23
23
  first_row_number_count = first_row.select{|str| str.to_f > 0}.length
24
- return false if first_row_number_count > 0
24
+ if first_row_number_count > 0
25
+ return false
26
+ end
25
27
 
26
- # If number col count is different, I guess first_row is header.
28
+ # If number col count is different, first_row is header.
27
29
  if first_row_number_count != second_row.select{|x| x.to_f > 0}.count
28
30
  return true
29
31
  end
@@ -1,19 +1,29 @@
1
1
  require 'csv'
2
+ require 'nkf'
2
3
  module Muzzy
3
4
  class Util
4
5
  def self.fetch_header_and_first_row(filepath, col_sep)
5
6
  raise ArgumentError, "filepath required" if filepath.nil?
6
7
  raise ArgumentError, "not found file" unless File.exists?(filepath)
7
8
  header_row, first_row = nil, nil
8
- CSV.foreach(filepath, col_sep: col_sep).each_with_index do |row, i|
9
- if i == 0
10
- header_row = row
11
- elsif i == 1
12
- first_row = row
13
- else
14
- break
9
+
10
+ from_fenc = 'UTF-8'
11
+ File.open(filepath, "rt") do |f|
12
+ s = f.readlines
13
+ from_fenc = NKF.guess(s.join).to_s
14
+ end
15
+ open(filepath, "rb:#{from_fenc}:UTF-8", undef: :replace) do |f|
16
+ CSV.new(f, col_sep: col_sep).each.with_index do |row, i|
17
+ if i == 0
18
+ header_row = row
19
+ elsif i == 1
20
+ first_row = row
21
+ else
22
+ break
23
+ end
15
24
  end
16
25
  end
26
+
17
27
  return [header_row, first_row]
18
28
  rescue ArgumentError => e
19
29
  raise e
@@ -1,3 +1,3 @@
1
1
  module Muzzy
2
- VERSION = "0.1.13"
2
+ VERSION = "0.1.14"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: muzzy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - vimtaku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-09-12 00:00:00.000000000 Z
11
+ date: 2018-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -90,6 +90,8 @@ files:
90
90
  - exe/muzzy
91
91
  - exe/muzzy_setup
92
92
  - lib/muzzy.rb
93
+ - lib/muzzy/column.rb
94
+ - lib/muzzy/columns_generator.rb
93
95
  - lib/muzzy/database_adapters/adapter_base.rb
94
96
  - lib/muzzy/database_adapters/mysql_adapter.rb
95
97
  - lib/muzzy/filetype_detector.rb