easy_sheet_io 0.1.4 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 791e3662406be26d6afb90f394f0d585b7c9e956d4cdc200db484be820af4eb8
4
- data.tar.gz: 7796eacac61646ec19c3d452b9ff738de37546b2a7360374a9f9d5fd9fabda8a
3
+ metadata.gz: 6de67291cc58a0ab7ea017efd79e847351f71e6790f5f2e4e3fb4462c2ff5f47
4
+ data.tar.gz: aa0dca360cfa89666b291d8ddeea149399068fbf57f12c422e615c5da03ac71e
5
5
  SHA512:
6
- metadata.gz: 991506127b66bfdd211f99ec00554dcca3ad521274d3c712389d3f346cf35f4d0b1c5e0478817f9ead1a915aa89c34a88abf49502ee054d30a41a02f19068510
7
- data.tar.gz: 6eb892fa842a1a522e939c432e66c0aef9384b37b4bae3b454bb9a82628a2dbd3b3c1cb3d13192b71cf915a7b886858fbb31331b5fa6f03a148baacefd5a611b
6
+ metadata.gz: 6cadf3040934873c1a9182000d6b90a9fbad20056e1bd0c17e2fe21929643a2a438832e4fdcdb5519b1def845925526cc3c4df460346affe397a8c05bddf95ce
7
+ data.tar.gz: 3d7df3a901737bdca5ee7cf8e9399b2d958d51134328ea5ee33ac5e17126847c54ff3e20617ad26b073029afbc790e31e10bef66d18b2d74429f36db417ddef9
data/Gemfile CHANGED
@@ -16,3 +16,4 @@ gem "rover-df"
16
16
  gem "smarter_csv"
17
17
  gem "roo-xls"
18
18
  gem "spreadsheet"
19
+ gem "ast"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- easy_sheet_io (0.1.3)
4
+ easy_sheet_io (0.2.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -72,6 +72,7 @@ PLATFORMS
72
72
  x86_64-linux
73
73
 
74
74
  DEPENDENCIES
75
+ ast
75
76
  daru
76
77
  easy_sheet_io!
77
78
  rake (~> 13.0)
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ["shun_yamaguchi_tc@live.jp"]
10
10
 
11
11
  spec.summary = "A simple way to Open .csv, .xls, .xlsx files."
12
- spec.description = "A simple way to Open .csv, .xls, .xlsx files. You can convert it to 2D Array, Hash, Dataframe."
12
+ spec.description = "A simple way to Open .csv, .xls, .xlsx files. You can convert it to 2D array, hash, data frame."
13
13
  spec.homepage = "https://github.com/show-o-atakun/easy_sheet_io"
14
14
  spec.license = "MIT"
15
15
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasySheetIo
4
- VERSION = "0.1.4"
4
+ VERSION = "0.2.2"
5
5
  end
data/lib/easy_sheet_io.rb CHANGED
@@ -17,22 +17,29 @@ module EasySheetIo
17
17
  return /csv$/ === path ? read_csv(path, **opt) : read_excel(path, **opt)
18
18
  end
19
19
 
20
- # ##Generate DF from CSV File
20
+ # ##Generate Array from CSV File, and convert it to Hash or DataFrame.
21
21
  # **opt candidate= line_from: 1, header: 0
22
- def read_csv(path, format: nil, encoding: "utf-8", **opt)
23
- csv = CSV.parse(File.open path, encoding: encoding, &:read) # Get 2D Array
22
+ def read_csv(path, format: nil, encoding: "utf-8", col_sep: ",", **opt)
23
+ # Get 2D Array
24
+ begin
25
+ csv = CSV.parse(File.open(path, encoding: encoding, &:read), col_sep: col_sep)
26
+ rescue Encoding::InvalidByteSequenceError
27
+ # Try Another Encoding
28
+ puts "Fail Encoding #{encoding}. Trying cp932..."
29
+ csv = CSV.parse(File.open(path, encoding: "cp932", &:read), col_sep: col_sep)
30
+ end
31
+
24
32
  return csv if format.nil?
25
33
 
34
+ # Convert Hash or DataFrame
26
35
  ans = to_hash(csv, **opt)
27
36
  return format==:hash || format=="hash" ? ans : to_df(ans, format: format)
28
37
  end
29
38
 
30
- # ##Generate DF from Excel File
39
+ # ##Generate Array from EXCEL File, and convert it to Hash or DataFrame.
31
40
  # **opt candidate= line_from: 1, header: 0)
32
- # !encoding parameter is not allowed yet
33
- # !(Finally, I want to make it automatically recognize encoding of file).
34
- def read_excel(path, sheet_i: 0, format: nil, **opt)
35
- a2d = open_excel(path, sheet_i) # Get 2D Array
41
+ def read_excel(path, sheet_i: 0, format: nil, encoding: "utf-8", **opt)
42
+ a2d = open_excel(path, sheet_i, encoding: encoding) # Get 2D Array
36
43
  return a2d if format.nil?
37
44
 
38
45
  ans = to_hash(a2d, **opt)
@@ -41,11 +48,31 @@ module EasySheetIo
41
48
 
42
49
  # Convert 2d Array to Hash
43
50
  # ##header: nil -> Default Headers(:column1, column2,...) are generated.
44
- def to_hash(array2d, line_from: 1, line_until: -1, header: 0)
45
- output = array2d[line_from..line_until]
46
- hd = header.nil? ? [*0...(output.longest_line)].map{"column#{_1}"} : array2d[header]
47
- output_transpose = output[0].zip(*output[1..])
51
+ # line_until=nil means the data are picked up until the end line.
52
+ def to_hash(array2d, line_from: 1, line_until: nil, header: 0, symbol_header: false)
53
+
54
+ # Define Read Range------------
55
+ lfrom, luntil = line_from, line_until
56
+ lf_reg, lu_reg = line_from.kind_of?(Regexp), line_until.kind_of?(Regexp)
48
57
 
58
+ if lf_reg || lu_reg
59
+ lines_ary = array2d.map{ _1.join "," }
60
+ lfrom = lines_ary.find_index{ line_from === _1 } if lf_reg
61
+ luntil = (lines_ary.length-1) - lines_ary.reverse.find_index{ line_until === _1 } if lu_reg
62
+ end
63
+ # -----------------------------
64
+
65
+ # Define Data Array------------
66
+ output = array2d[lfrom...luntil]
67
+ output_transpose = output[0].zip(*output[1..])
68
+ # -----------------------------
69
+
70
+ # Define Header----------------
71
+ hd = header.nil? ? [*0...(output.longest_line)].map{"column#{_1}"} : check_header(array2d[header])
72
+ hd = hd.map { _1.intern } if symbol_header
73
+ # -----------------------------
74
+
75
+ # Make Hash(Header => Data Array)
49
76
  return hd.each_with_object({}).with_index {|(hdr, hash), i| hash[hdr]=output_transpose[i]}
50
77
  end
51
78
 
@@ -58,19 +85,27 @@ module EasySheetIo
58
85
  end
59
86
  end
60
87
 
61
- # ##Genarate Hash from excel file
62
- def open_excel(path, sheet_i)
63
- begin
64
- book = /xlsx$/ === path ? Roo::Excelx.new(path) : Roo::Excel.new(path)
88
+ # ##Genarate Array from excel file
89
+ def open_excel(path, sheet_i, encoding: "utf-8")
90
+ if /xlsx$/ === path
91
+ puts "Sorry, encoding option is not supported yet for xlsx file." if encoding != "utf-8"
92
+
93
+ book = Roo::Excelx.new(path)
65
94
  s = book.sheet(sheet_i)
66
95
 
67
- ## bottole neck===
96
+ ## bottole neck
68
97
  return s.to_a
69
-
70
- rescue Encoding::InvalidByteSequenceError
71
-
72
- Spreadsheet.client_encoding="Windows-31J"
73
- ss = Spreadsheet.open(path)
98
+
99
+ # xls
100
+ else
101
+ begin
102
+ Spreadsheet.client_encoding = encoding
103
+ ss = Spreadsheet.open(path)
104
+ rescue Encoding::InvalidByteSequenceError
105
+ puts "Fail Encoding #{encoding}. Trying Windows-31J..."
106
+ Spreadsheet.client_encoding = "Windows-31J"
107
+ ss = Spreadsheet.open(path)
108
+ end
74
109
 
75
110
  a2d = []
76
111
  ss.worksheets[sheet_i].rows.each do |row|
@@ -81,6 +116,28 @@ module EasySheetIo
81
116
 
82
117
  return a2d
83
118
  end
119
+ end
120
+
121
+ # Fix blank or duplicated header
122
+ def check_header(header_array)
123
+ ans = header_array.map.with_index do |item, i|
124
+ if item.nil?
125
+ "column#{i}"
126
+ elsif item.kind_of?(String)
127
+ /^\s*$/ === item ? "column#{i}" : item.gsub(/\s+/, "")
128
+ else
129
+ item
130
+ end
131
+ end
132
+
133
+ dup_check = (0...(header_array.length)).group_by {|i| ans[i]}
134
+ dup_check.each do |item, i_s|
135
+ if i_s.length > 1
136
+ i_s.each_with_index {|i, index_in_i_s| ans[i] = "#{ans[i]}_#{index_in_i_s}"}
137
+ end
138
+ end
84
139
 
140
+ return ans
85
141
  end
142
+
86
143
  end
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_sheet_io
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - show-o-atakun
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-22 00:00:00.000000000 Z
11
+ date: 2022-03-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A simple way to Open .csv, .xls, .xlsx files. You can convert it to 2D
14
- Array, Hash, Dataframe.
14
+ array, hash, data frame.
15
15
  email:
16
16
  - shun_yamaguchi_tc@live.jp
17
17
  executables: []