table_analysis 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +52 -13
- data/demo.rb +2 -2
- data/file/demo3.html +20 -0
- data/lib/table_analysis.rb +4 -4
- data/lib/table_analysis/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b44fee38e40791fbeb49d70162cdc145dcec4bfa69e7afd0cd04607125db127d
|
4
|
+
data.tar.gz: 98e7cd78b3396472e59c7f9a809d307a498b47d78ae3b4830eb7a3ca5bc473c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cd192242b6b24ad949c2fde867d4cf44a0030b9f96d253ff2d8b73f6deb45f72ff705d15a0b568413297c208cc8b2bebfa96dd1c9b3b0d4c89df72a02e93968
|
7
|
+
data.tar.gz: 01e6ff00318c769ff19a95c45b1ffac42d90f27381a8e2c596ecbaf1b316ded0b92930a09657071c9d077af5669efdd1f350c6247c52983e6e8ecd7084c06d16
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# TableAnalysis
|
2
2
|
|
3
|
-
Welcome
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
Welcome!
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,24 +20,65 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
23
|
+
```
|
25
24
|
doc_table_html = File.read('file/demo1.html')
|
26
25
|
|
27
26
|
TableAnalysis::Main.generator(doc_table_html, header_start_row, selected_rows)
|
28
|
-
|
29
|
-
|
27
|
+
```
|
28
|
+
|
29
|
+
> header start row, selected rows 都是从1开始
|
30
|
+
>> header start row: 找到header的tr出现在table的第几行,如果标题有多行,取第一行行号
|
30
31
|
|
31
|
-
|
32
|
+
>> selected rows: 取第几列数据,数这一行的colspan
|
33
|
+
>>
|
34
|
+
|
35
|
+
## 举例
|
36
|
+
|
37
|
+
```
|
38
|
+
// doc
|
39
|
+
<table border="1">
|
40
|
+
<tr>
|
41
|
+
<td rowspan="2">月份</td>
|
42
|
+
<td colspan='2'>开销</td>
|
43
|
+
</tr>
|
44
|
+
<tr>
|
45
|
+
<td>生活</td>
|
46
|
+
<td>工作</td>
|
47
|
+
</tr>
|
48
|
+
<tr>
|
49
|
+
<td>1</td>
|
50
|
+
<td>$70</td>
|
51
|
+
<td>$100</td>
|
52
|
+
</tr>
|
53
|
+
<tr>
|
54
|
+
<td>2</td>
|
55
|
+
<td>$100</td>
|
56
|
+
<td>$80</td>
|
57
|
+
</tr>
|
58
|
+
</table>
|
59
|
+
```
|
60
|
+
取《开销》中《生活》列
|
32
61
|
|
33
|
-
|
62
|
+
headerStartRow: 1
|
34
63
|
|
35
|
-
|
64
|
+
>通常的值都是1,如果表中有脏数据,导致标题不在第一个tr中的时候,我们对应做修改。
|
36
65
|
|
37
|
-
|
66
|
+
selectedRows: [2]
|
38
67
|
|
39
|
-
|
68
|
+
>整个table有三列,第一列月份,第二列和第三列都是开销,生活数据是第二列。
|
69
|
+
|
70
|
+
|
71
|
+
```
|
72
|
+
p TableAnalysis::Main.generator(doc, 1, 2)
|
73
|
+
```
|
74
|
+
|
75
|
+
```
|
76
|
+
返回结果 [[0, 1, -1], [-1, 1, 0], [0, 1, 0], [0, 1, 0]]
|
77
|
+
```
|
40
78
|
|
41
|
-
|
79
|
+
返回值是整个表结构,1代表要取的结果,-1代表被占用, 0代表无用数据。
|
42
80
|
|
43
|
-
|
81
|
+
当去掉-1的值后,第一行有两列数据,第二行两列,第三行3列,第四行3列,就跟html的结构一样了,
|
82
|
+
迭代表结构,根据我们获得的结果中1的位置,就知道哪些数据是有用的数据了。
|
44
83
|
|
45
|
-
|
84
|
+
以后的版本里,我会提供一个方法,把返回结果直接用数据+值的结构返回。类似[{ '0':'月份', '1': '开销', '-1': '开销'}, ...], 再提供.values返回包含1和-1的结果,0对应的值返回空字符串。
|
data/demo.rb
CHANGED
data/file/demo3.html
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
<table border="1">
|
2
|
+
<tr>
|
3
|
+
<th rowspan="2">月份</th>
|
4
|
+
<th colspan='2'>开销</th>
|
5
|
+
</tr>
|
6
|
+
<tr>
|
7
|
+
<th>生活</th>
|
8
|
+
<th>工作</th>
|
9
|
+
</tr>
|
10
|
+
<tr>
|
11
|
+
<td>1</td>
|
12
|
+
<td>$70</td>
|
13
|
+
<td>$100</td>
|
14
|
+
</tr>
|
15
|
+
<tr>
|
16
|
+
<td>2</td>
|
17
|
+
<td>$100</td>
|
18
|
+
<td>$80</td>
|
19
|
+
</tr>
|
20
|
+
</table>
|
data/lib/table_analysis.rb
CHANGED
@@ -28,13 +28,13 @@ module TableAnalysis
|
|
28
28
|
select_table_tr.each_with_index do |tr, tr_index|
|
29
29
|
if tr_index < header_start_row.to_i - 1
|
30
30
|
upheader_tr_size += 1
|
31
|
-
tr.xpath('./td').each_with_index do |td, td_index|
|
31
|
+
tr.xpath('./td|./th').each_with_index do |td, td_index|
|
32
32
|
rowspan = td.attribute('rowspan')&.value
|
33
33
|
colspan = td.attribute('colspan')&.value
|
34
34
|
upheader_content_tds << [rowspan, colspan]
|
35
35
|
end
|
36
36
|
elsif tr_index == header_start_row.to_i - 1
|
37
|
-
tr.xpath('./td').each do |td|
|
37
|
+
tr.xpath('./td|./th').each do |td|
|
38
38
|
colspan = td.attribute('colspan')&.value
|
39
39
|
rowspan = td.attribute('rowspan')&.value
|
40
40
|
header_content_tds << [rowspan, colspan]
|
@@ -42,14 +42,14 @@ module TableAnalysis
|
|
42
42
|
tr_rows = rowspan.to_i.dup if !rowspan.nil? && rowspan.to_i > 1 && tr_rows < rowspan.to_i
|
43
43
|
end
|
44
44
|
elsif tr_index > header_start_row.to_i - 1 && tr_index < header_start_row.to_i - 1 + tr_rows
|
45
|
-
tr.xpath('./td').each do |td|
|
45
|
+
tr.xpath('./td|./th').each do |td|
|
46
46
|
rowspan = td.attribute('rowspan')&.value
|
47
47
|
colspan = td.attribute('colspan')&.value
|
48
48
|
header_body_content_tds << [rowspan, colspan]
|
49
49
|
end
|
50
50
|
elsif tr_index >= header_start_row.to_i - 1 + tr_rows
|
51
51
|
body_tr_size += 1
|
52
|
-
tr.xpath('./td').each_with_index do |td, td_index|
|
52
|
+
tr.xpath('./td|./th').each_with_index do |td, td_index|
|
53
53
|
rowspan = td.attribute('rowspan')&.value
|
54
54
|
colspan = td.attribute('colspan')&.value
|
55
55
|
body_content_tds << [rowspan, colspan]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_analysis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mico_xiaozhen@sina.com
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -87,6 +87,7 @@ files:
|
|
87
87
|
- demo.rb
|
88
88
|
- file/demo1.html
|
89
89
|
- file/demo2.html
|
90
|
+
- file/demo3.html
|
90
91
|
- lib/table_analysis.rb
|
91
92
|
- lib/table_analysis/body.rb
|
92
93
|
- lib/table_analysis/core.rb
|