table_analysis 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -13
- data/demo.rb +2 -2
- data/file/demo3.html +20 -0
- data/lib/table_analysis.rb +4 -4
- data/lib/table_analysis/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b44fee38e40791fbeb49d70162cdc145dcec4bfa69e7afd0cd04607125db127d
|
4
|
+
data.tar.gz: 98e7cd78b3396472e59c7f9a809d307a498b47d78ae3b4830eb7a3ca5bc473c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cd192242b6b24ad949c2fde867d4cf44a0030b9f96d253ff2d8b73f6deb45f72ff705d15a0b568413297c208cc8b2bebfa96dd1c9b3b0d4c89df72a02e93968
|
7
|
+
data.tar.gz: 01e6ff00318c769ff19a95c45b1ffac42d90f27381a8e2c596ecbaf1b316ded0b92930a09657071c9d077af5669efdd1f350c6247c52983e6e8ecd7084c06d16
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# TableAnalysis
|
2
2
|
|
3
|
-
Welcome
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
Welcome!
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,24 +20,65 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
23
|
+
```
|
25
24
|
doc_table_html = File.read('file/demo1.html')
|
26
25
|
|
27
26
|
TableAnalysis::Main.generator(doc_table_html, header_start_row, selected_rows)
|
28
|
-
|
29
|
-
|
27
|
+
```
|
28
|
+
|
29
|
+
> header start row, selected rows 都是从1开始
|
30
|
+
>> header start row: 找到header的tr出现在table的第几行,如果标题有多行,取第一行行号
|
30
31
|
|
31
|
-
|
32
|
+
>> selected rows: 取第几列数据,数这一行的colspan
|
33
|
+
>>
|
34
|
+
|
35
|
+
## 举例
|
36
|
+
|
37
|
+
```
|
38
|
+
// doc
|
39
|
+
<table border="1">
|
40
|
+
<tr>
|
41
|
+
<td rowspan="2">月份</td>
|
42
|
+
<td colspan='2'>开销</td>
|
43
|
+
</tr>
|
44
|
+
<tr>
|
45
|
+
<td>生活</td>
|
46
|
+
<td>工作</td>
|
47
|
+
</tr>
|
48
|
+
<tr>
|
49
|
+
<td>1</td>
|
50
|
+
<td>$70</td>
|
51
|
+
<td>$100</td>
|
52
|
+
</tr>
|
53
|
+
<tr>
|
54
|
+
<td>2</td>
|
55
|
+
<td>$100</td>
|
56
|
+
<td>$80</td>
|
57
|
+
</tr>
|
58
|
+
</table>
|
59
|
+
```
|
60
|
+
取《开销》中《生活》列
|
32
61
|
|
33
|
-
|
62
|
+
headerStartRow: 1
|
34
63
|
|
35
|
-
|
64
|
+
>通常的值都是1,如果表中有脏数据,导致标题不在第一个tr中的时候,我们对应做修改。
|
36
65
|
|
37
|
-
|
66
|
+
selectedRows: [2]
|
38
67
|
|
39
|
-
|
68
|
+
>整个table有三列,第一列月份,第二列和第三列都是开销,生活数据是第二列。
|
69
|
+
|
70
|
+
|
71
|
+
```
|
72
|
+
p TableAnalysis::Main.generator(doc, 1, 2)
|
73
|
+
```
|
74
|
+
|
75
|
+
```
|
76
|
+
返回结果 [[0, 1, -1], [-1, 1, 0], [0, 1, 0], [0, 1, 0]]
|
77
|
+
```
|
40
78
|
|
41
|
-
|
79
|
+
返回值是整个表结构,1代表要取的结果,-1代表被占用, 0代表无用数据。
|
42
80
|
|
43
|
-
|
81
|
+
当去掉-1的值后,第一行有两列数据,第二行两列,第三行3列,第四行3列,就跟html的结构一样了,
|
82
|
+
迭代表结构,根据我们获得的结果中1的位置,就知道哪些数据是有用的数据了。
|
44
83
|
|
45
|
-
|
84
|
+
以后的版本里,我会提供一个方法,把返回结果直接用数据+值的结构返回。类似[{ '0':'月份', '1': '开销', '-1': '开销'}, ...], 再提供.values返回包含1和-1的结果,0对应的值返回空字符串。
|
data/demo.rb
CHANGED
data/file/demo3.html
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
<table border="1">
|
2
|
+
<tr>
|
3
|
+
<th rowspan="2">月份</th>
|
4
|
+
<th colspan='2'>开销</th>
|
5
|
+
</tr>
|
6
|
+
<tr>
|
7
|
+
<th>生活</th>
|
8
|
+
<th>工作</th>
|
9
|
+
</tr>
|
10
|
+
<tr>
|
11
|
+
<td>1</td>
|
12
|
+
<td>$70</td>
|
13
|
+
<td>$100</td>
|
14
|
+
</tr>
|
15
|
+
<tr>
|
16
|
+
<td>2</td>
|
17
|
+
<td>$100</td>
|
18
|
+
<td>$80</td>
|
19
|
+
</tr>
|
20
|
+
</table>
|
data/lib/table_analysis.rb
CHANGED
@@ -28,13 +28,13 @@ module TableAnalysis
|
|
28
28
|
select_table_tr.each_with_index do |tr, tr_index|
|
29
29
|
if tr_index < header_start_row.to_i - 1
|
30
30
|
upheader_tr_size += 1
|
31
|
-
tr.xpath('./td').each_with_index do |td, td_index|
|
31
|
+
tr.xpath('./td|./th').each_with_index do |td, td_index|
|
32
32
|
rowspan = td.attribute('rowspan')&.value
|
33
33
|
colspan = td.attribute('colspan')&.value
|
34
34
|
upheader_content_tds << [rowspan, colspan]
|
35
35
|
end
|
36
36
|
elsif tr_index == header_start_row.to_i - 1
|
37
|
-
tr.xpath('./td').each do |td|
|
37
|
+
tr.xpath('./td|./th').each do |td|
|
38
38
|
colspan = td.attribute('colspan')&.value
|
39
39
|
rowspan = td.attribute('rowspan')&.value
|
40
40
|
header_content_tds << [rowspan, colspan]
|
@@ -42,14 +42,14 @@ module TableAnalysis
|
|
42
42
|
tr_rows = rowspan.to_i.dup if !rowspan.nil? && rowspan.to_i > 1 && tr_rows < rowspan.to_i
|
43
43
|
end
|
44
44
|
elsif tr_index > header_start_row.to_i - 1 && tr_index < header_start_row.to_i - 1 + tr_rows
|
45
|
-
tr.xpath('./td').each do |td|
|
45
|
+
tr.xpath('./td|./th').each do |td|
|
46
46
|
rowspan = td.attribute('rowspan')&.value
|
47
47
|
colspan = td.attribute('colspan')&.value
|
48
48
|
header_body_content_tds << [rowspan, colspan]
|
49
49
|
end
|
50
50
|
elsif tr_index >= header_start_row.to_i - 1 + tr_rows
|
51
51
|
body_tr_size += 1
|
52
|
-
tr.xpath('./td').each_with_index do |td, td_index|
|
52
|
+
tr.xpath('./td|./th').each_with_index do |td, td_index|
|
53
53
|
rowspan = td.attribute('rowspan')&.value
|
54
54
|
colspan = td.attribute('colspan')&.value
|
55
55
|
body_content_tds << [rowspan, colspan]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_analysis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mico_xiaozhen@sina.com
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -87,6 +87,7 @@ files:
|
|
87
87
|
- demo.rb
|
88
88
|
- file/demo1.html
|
89
89
|
- file/demo2.html
|
90
|
+
- file/demo3.html
|
90
91
|
- lib/table_analysis.rb
|
91
92
|
- lib/table_analysis/body.rb
|
92
93
|
- lib/table_analysis/core.rb
|