easy_sheet_io 0.1.4 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 791e3662406be26d6afb90f394f0d585b7c9e956d4cdc200db484be820af4eb8
4
- data.tar.gz: 7796eacac61646ec19c3d452b9ff738de37546b2a7360374a9f9d5fd9fabda8a
3
+ metadata.gz: 6de67291cc58a0ab7ea017efd79e847351f71e6790f5f2e4e3fb4462c2ff5f47
4
+ data.tar.gz: aa0dca360cfa89666b291d8ddeea149399068fbf57f12c422e615c5da03ac71e
5
5
  SHA512:
6
- metadata.gz: 991506127b66bfdd211f99ec00554dcca3ad521274d3c712389d3f346cf35f4d0b1c5e0478817f9ead1a915aa89c34a88abf49502ee054d30a41a02f19068510
7
- data.tar.gz: 6eb892fa842a1a522e939c432e66c0aef9384b37b4bae3b454bb9a82628a2dbd3b3c1cb3d13192b71cf915a7b886858fbb31331b5fa6f03a148baacefd5a611b
6
+ metadata.gz: 6cadf3040934873c1a9182000d6b90a9fbad20056e1bd0c17e2fe21929643a2a438832e4fdcdb5519b1def845925526cc3c4df460346affe397a8c05bddf95ce
7
+ data.tar.gz: 3d7df3a901737bdca5ee7cf8e9399b2d958d51134328ea5ee33ac5e17126847c54ff3e20617ad26b073029afbc790e31e10bef66d18b2d74429f36db417ddef9
data/Gemfile CHANGED
@@ -16,3 +16,4 @@ gem "rover-df"
16
16
  gem "smarter_csv"
17
17
  gem "roo-xls"
18
18
  gem "spreadsheet"
19
+ gem "ast"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- easy_sheet_io (0.1.3)
4
+ easy_sheet_io (0.2.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -72,6 +72,7 @@ PLATFORMS
72
72
  x86_64-linux
73
73
 
74
74
  DEPENDENCIES
75
+ ast
75
76
  daru
76
77
  easy_sheet_io!
77
78
  rake (~> 13.0)
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ["shun_yamaguchi_tc@live.jp"]
10
10
 
11
11
  spec.summary = "A simple way to Open .csv, .xls, .xlsx files."
12
- spec.description = "A simple way to Open .csv, .xls, .xlsx files. You can convert it to 2D Array, Hash, Dataframe."
12
+ spec.description = "A simple way to Open .csv, .xls, .xlsx files. You can convert it to 2D array, hash, data frame."
13
13
  spec.homepage = "https://github.com/show-o-atakun/easy_sheet_io"
14
14
  spec.license = "MIT"
15
15
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasySheetIo
4
- VERSION = "0.1.4"
4
+ VERSION = "0.2.2"
5
5
  end
data/lib/easy_sheet_io.rb CHANGED
@@ -17,22 +17,29 @@ module EasySheetIo
17
17
  return /csv$/ === path ? read_csv(path, **opt) : read_excel(path, **opt)
18
18
  end
19
19
 
20
- # ##Generate DF from CSV File
20
+ # ##Generate Array from CSV File, and convert it to Hash or DataFrame.
21
21
  # **opt candidate= line_from: 1, header: 0
22
- def read_csv(path, format: nil, encoding: "utf-8", **opt)
23
- csv = CSV.parse(File.open path, encoding: encoding, &:read) # Get 2D Array
22
+ def read_csv(path, format: nil, encoding: "utf-8", col_sep: ",", **opt)
23
+ # Get 2D Array
24
+ begin
25
+ csv = CSV.parse(File.open(path, encoding: encoding, &:read), col_sep: col_sep)
26
+ rescue Encoding::InvalidByteSequenceError
27
+ # Try Another Encoding
28
+ puts "Fail Encoding #{encoding}. Trying cp932..."
29
+ csv = CSV.parse(File.open(path, encoding: "cp932", &:read), col_sep: col_sep)
30
+ end
31
+
24
32
  return csv if format.nil?
25
33
 
34
+ # Convert Hash or DataFrame
26
35
  ans = to_hash(csv, **opt)
27
36
  return format==:hash || format=="hash" ? ans : to_df(ans, format: format)
28
37
  end
29
38
 
30
- # ##Generate DF from Excel File
39
+ # ##Generate Array from EXCEL File, and convert it to Hash or DataFrame.
31
40
  # **opt candidate= line_from: 1, header: 0)
32
- # !encoding parameter is not allowed yet
33
- # !(Finally, I want to make it automatically recognize encoding of file).
34
- def read_excel(path, sheet_i: 0, format: nil, **opt)
35
- a2d = open_excel(path, sheet_i) # Get 2D Array
41
+ def read_excel(path, sheet_i: 0, format: nil, encoding: "utf-8", **opt)
42
+ a2d = open_excel(path, sheet_i, encoding: encoding) # Get 2D Array
36
43
  return a2d if format.nil?
37
44
 
38
45
  ans = to_hash(a2d, **opt)
@@ -41,11 +48,31 @@ module EasySheetIo
41
48
 
42
49
  # Convert 2d Array to Hash
43
50
  # ##header: nil -> Default Headers(:column1, column2,...) are generated.
44
- def to_hash(array2d, line_from: 1, line_until: -1, header: 0)
45
- output = array2d[line_from..line_until]
46
- hd = header.nil? ? [*0...(output.longest_line)].map{"column#{_1}"} : array2d[header]
47
- output_transpose = output[0].zip(*output[1..])
51
+ # line_until=nil means the data are picked up until the end line.
52
+ def to_hash(array2d, line_from: 1, line_until: nil, header: 0, symbol_header: false)
53
+
54
+ # Define Read Range------------
55
+ lfrom, luntil = line_from, line_until
56
+ lf_reg, lu_reg = line_from.kind_of?(Regexp), line_until.kind_of?(Regexp)
48
57
 
58
+ if lf_reg || lu_reg
59
+ lines_ary = array2d.map{ _1.join "," }
60
+ lfrom = lines_ary.find_index{ line_from === _1 } if lf_reg
61
+ luntil = (lines_ary.length-1) - lines_ary.reverse.find_index{ line_until === _1 } if lu_reg
62
+ end
63
+ # -----------------------------
64
+
65
+ # Define Data Array------------
66
+ output = array2d[lfrom...luntil]
67
+ output_transpose = output[0].zip(*output[1..])
68
+ # -----------------------------
69
+
70
+ # Define Header----------------
71
+ hd = header.nil? ? [*0...(output.longest_line)].map{"column#{_1}"} : check_header(array2d[header])
72
+ hd = hd.map { _1.intern } if symbol_header
73
+ # -----------------------------
74
+
75
+ # Make Hash(Header => Data Array)
49
76
  return hd.each_with_object({}).with_index {|(hdr, hash), i| hash[hdr]=output_transpose[i]}
50
77
  end
51
78
 
@@ -58,19 +85,27 @@ module EasySheetIo
58
85
  end
59
86
  end
60
87
 
61
- # ##Genarate Hash from excel file
62
- def open_excel(path, sheet_i)
63
- begin
64
- book = /xlsx$/ === path ? Roo::Excelx.new(path) : Roo::Excel.new(path)
88
+ # ##Genarate Array from excel file
89
+ def open_excel(path, sheet_i, encoding: "utf-8")
90
+ if /xlsx$/ === path
91
+ puts "Sorry, encoding option is not supported yet for xlsx file." if encoding != "utf-8"
92
+
93
+ book = Roo::Excelx.new(path)
65
94
  s = book.sheet(sheet_i)
66
95
 
67
- ## bottole neck===
96
+ ## bottole neck
68
97
  return s.to_a
69
-
70
- rescue Encoding::InvalidByteSequenceError
71
-
72
- Spreadsheet.client_encoding="Windows-31J"
73
- ss = Spreadsheet.open(path)
98
+
99
+ # xls
100
+ else
101
+ begin
102
+ Spreadsheet.client_encoding = encoding
103
+ ss = Spreadsheet.open(path)
104
+ rescue Encoding::InvalidByteSequenceError
105
+ puts "Fail Encoding #{encoding}. Trying Windows-31J..."
106
+ Spreadsheet.client_encoding = "Windows-31J"
107
+ ss = Spreadsheet.open(path)
108
+ end
74
109
 
75
110
  a2d = []
76
111
  ss.worksheets[sheet_i].rows.each do |row|
@@ -81,6 +116,28 @@ module EasySheetIo
81
116
 
82
117
  return a2d
83
118
  end
119
+ end
120
+
121
+ # Fix blank or duplicated header
122
+ def check_header(header_array)
123
+ ans = header_array.map.with_index do |item, i|
124
+ if item.nil?
125
+ "column#{i}"
126
+ elsif item.kind_of?(String)
127
+ /^\s*$/ === item ? "column#{i}" : item.gsub(/\s+/, "")
128
+ else
129
+ item
130
+ end
131
+ end
132
+
133
+ dup_check = (0...(header_array.length)).group_by {|i| ans[i]}
134
+ dup_check.each do |item, i_s|
135
+ if i_s.length > 1
136
+ i_s.each_with_index {|i, index_in_i_s| ans[i] = "#{ans[i]}_#{index_in_i_s}"}
137
+ end
138
+ end
84
139
 
140
+ return ans
85
141
  end
142
+
86
143
  end
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_sheet_io
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - show-o-atakun
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-22 00:00:00.000000000 Z
11
+ date: 2022-03-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A simple way to Open .csv, .xls, .xlsx files. You can convert it to 2D
14
- Array, Hash, Dataframe.
14
+ array, hash, data frame.
15
15
  email:
16
16
  - shun_yamaguchi_tc@live.jp
17
17
  executables: []