casetdown 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CasetDown/bin/cm +73 -0
- data/CasetDown/bin/cml +41 -0
- data/CasetDown/casetable.rb +80 -0
- data/CasetDown/casetcode.rb +228 -0
- data/CasetDown/casetdoc.rb +117 -0
- data/CasetDown/casetdown.rb +13 -0
- data/CasetDown/casetter.rb +72 -0
- data/EnData/api-app.rb +89 -0
- data/EnData/endata-app.rb +62 -0
- data/EnData/endata.rb +81 -0
- data/Tabot/newtab.rb +14 -0
- data/Tabot/simtab.rb +258 -0
- data/TextUtils/text_absparser.rb +42 -0
- data/TextUtils/text_abstract.rb +84 -0
- data/TextUtils/text_mind.rb +180 -0
- data/TinText/cache.rb +24 -0
- data/TinText/tin_text.rb +44 -0
- data/TinText/tintext.rb +4 -0
- data/TinText/tum.rb +13 -0
- data/XMLUtils/XmlUtils.rb +207 -0
- metadata +61 -0
data/EnData/api-app.rb
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
#coding:utf-8
|
|
2
|
+
|
|
3
|
+
##################################################################################################################
|
|
4
|
+
# APPLICATION FOR WEBOOT API CALLING
|
|
5
|
+
##################################################################################################################
|
|
6
|
+
require 'weboot'
|
|
7
|
+
|
|
8
|
+
def cmds tabname
|
|
9
|
+
return EnData.get_body(tabname).join(";")
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def run tables, bind_table
|
|
13
|
+
EnData::ApiRequest.run tables, bind_table
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
##################################################################################################################
|
|
17
|
+
#
|
|
18
|
+
# EnData::ApiRequest
|
|
19
|
+
#
|
|
20
|
+
# API: template1 # an example of templates
|
|
21
|
+
# API: bind # single bind
|
|
22
|
+
# API: send # single send
|
|
23
|
+
# API: make # batch bind
|
|
24
|
+
# API: handle # batch send
|
|
25
|
+
# API: run # make + handle
|
|
26
|
+
#
|
|
27
|
+
# Step1: EnData::ApiRequest.make
|
|
28
|
+
# CASE: table1 × table2 × ... >--(join)--> task_table + bind_table(ref,val) >--eval(binding)--> task_instances
|
|
29
|
+
# USE: EnData.join_table() |> EnData.make_ref |> EnData.make_doc |> EnData::ApiRequest.bind
|
|
30
|
+
#
|
|
31
|
+
# Step2: EnData::ApiRequest.handle
|
|
32
|
+
# CASE: task_instances >--(API CALLING)--> running_results
|
|
33
|
+
# USE: EnData::ApiRequest.send
|
|
34
|
+
#
|
|
35
|
+
##################################################################################################################
|
|
36
|
+
module EnData
|
|
37
|
+
module ApiRequest
|
|
38
|
+
module_function
|
|
39
|
+
|
|
40
|
+
def template1
|
|
41
|
+
%Q|POST ${path} HTTP/1.1
|
|
42
|
+
Host: ${server}
|
|
43
|
+
Accept: */*
|
|
44
|
+
Content-Type: application/json
|
|
45
|
+
|
|
46
|
+
{
|
|
47
|
+
"protocol": "${protocol}",
|
|
48
|
+
"host": "${device}",
|
|
49
|
+
"user": "${access-key}",
|
|
50
|
+
"app-key": "${app-key}",
|
|
51
|
+
"cmds": "${operations}",
|
|
52
|
+
"method": "${processes}"
|
|
53
|
+
}|
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def bind options, template=EnData::ApiRequest.template1
|
|
57
|
+
Replacement.init
|
|
58
|
+
['path','server','protocol','device','access-key','app-key','operations','processes'].each do|key|
|
|
59
|
+
Replacement[key] = options[key].to_s
|
|
60
|
+
end
|
|
61
|
+
return TinText.instance(template)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def send context
|
|
65
|
+
request = Weboot.request context
|
|
66
|
+
response = Weboot.construct request
|
|
67
|
+
report = JSON.parse(response['body'])['content']
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def make tables, bind_table
|
|
71
|
+
# Please don't join too much tables
|
|
72
|
+
tasks = tables.shift
|
|
73
|
+
tasks = EnData.join_table(tasks, tables.shift) until tables.empty?
|
|
74
|
+
bind_table.each{|refname, reftext|tasks = EnData.make_ref(tasks, refname, reftext)}
|
|
75
|
+
binds = EnData.make_doc(tasks)
|
|
76
|
+
instances = binds.map{|binding|EnData::ApiRequest.bind binding} # with EnData::ApiRequest.template
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def handle instances
|
|
80
|
+
instances.map{|instance| EnData::ApiRequest.send instance}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def run tables, bind_table
|
|
84
|
+
instances = EnData::ApiRequest.make tables, bind_table
|
|
85
|
+
EnData::ApiRequest.handle(instances)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#coding:utf-8
|
|
2
|
+
|
|
3
|
+
##################################################################################################################
|
|
4
|
+
#
|
|
5
|
+
# EnData
|
|
6
|
+
#
|
|
7
|
+
# CASE: table from DATA
|
|
8
|
+
# USE: EnData.table(table_name)
|
|
9
|
+
#
|
|
10
|
+
# CASE: table1 × table2
|
|
11
|
+
# USE: EnDataApp.join_table(table_name1, table_name2)
|
|
12
|
+
#
|
|
13
|
+
# CASE: table[head|body] >--(cut-head)--> table[body]
|
|
14
|
+
# USE: EnDataApp.get_body(table_name)
|
|
15
|
+
#
|
|
16
|
+
# CASE: table[head=[field*], record=[value*]] >----> tree{field=>value, ...}
|
|
17
|
+
# USE: EnDataApp.make_doc(table)
|
|
18
|
+
#
|
|
19
|
+
# CASE: ${refname} >--(replace)--> reftext
|
|
20
|
+
# USE: EnDataApp.make_ref(table, refname, reftext)
|
|
21
|
+
#
|
|
22
|
+
##################################################################################################################
|
|
23
|
+
|
|
24
|
+
module EnData
|
|
25
|
+
module_function
|
|
26
|
+
|
|
27
|
+
# [CUSTOM_YOUR_NAME@endata CUSTOM_YOUR_PATH]$ruby table
|
|
28
|
+
def table name='table'
|
|
29
|
+
EnData.load()
|
|
30
|
+
EnData.parse()
|
|
31
|
+
EnData.run(EnData.source (EnData.select name: name))
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def join_table name1, name2
|
|
35
|
+
table1 = EnData.table(name1)
|
|
36
|
+
table2 = EnData.table(name2)
|
|
37
|
+
table1.绑定表头 table1.first
|
|
38
|
+
table2.绑定表头 table2.first
|
|
39
|
+
return table1.拼接 table2
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def get_body name
|
|
43
|
+
EnData.table(name)[1..-1]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def make_doc table
|
|
47
|
+
return table[1..-1].inject([]) do|doc, record|
|
|
48
|
+
rec = {}
|
|
49
|
+
record.each_with_index{|value, index|rec[table[0][index]] = value}
|
|
50
|
+
doc.push rec; doc
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def make_ref table, refname, reftext
|
|
55
|
+
return table.inject([]) do|newtab, record|
|
|
56
|
+
newrec = []
|
|
57
|
+
record.each{|field|newrec << (field=="${#{refname}}" ? reftext : field)}
|
|
58
|
+
newtab.push newrec; newtab
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
end
|
data/EnData/endata.rb
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#coding:utf-8
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
module EnData
|
|
7
|
+
extend self
|
|
8
|
+
|
|
9
|
+
def load
|
|
10
|
+
@__EnData__ ||= DATA.read
|
|
11
|
+
return @__EnData__
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# ['username'@EnData 'workpath']$ 'command' 'scriptname' 'argument1#' ...
|
|
15
|
+
def parse data=@__EnData__
|
|
16
|
+
@__datalist__ = {}
|
|
17
|
+
temp = {
|
|
18
|
+
'user' => '',
|
|
19
|
+
'path' => '',
|
|
20
|
+
'handler' => '',
|
|
21
|
+
'name' => '',
|
|
22
|
+
'arguments' => [],
|
|
23
|
+
'context' => []
|
|
24
|
+
}
|
|
25
|
+
data.split("\n").each do|line|
|
|
26
|
+
if matcher = /^\[[\w|\s]*\@endata[\w|\s|\/|\~]*\]\$/.match(line)
|
|
27
|
+
@__datalist__[temp['name']] = temp
|
|
28
|
+
front, back = line.split("@endata")
|
|
29
|
+
usr = front.sub("[",'').strip
|
|
30
|
+
pwd = back.split("]")[0].strip
|
|
31
|
+
cmd = back.split("]$")[-1].split(" ")
|
|
32
|
+
handler, name, arguments = cmd[0], cmd[1], cmd[2..-1]
|
|
33
|
+
context = ''
|
|
34
|
+
temp = {
|
|
35
|
+
'user' => usr,
|
|
36
|
+
'path' => pwd,
|
|
37
|
+
'handler' => handler,
|
|
38
|
+
'name' => name,
|
|
39
|
+
'arguments' => arguments,
|
|
40
|
+
'context' => []
|
|
41
|
+
}
|
|
42
|
+
else
|
|
43
|
+
temp['context'] << line
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
@__datalist__[temp['name']] = temp
|
|
47
|
+
return @__datallist__
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def select options={}
|
|
51
|
+
if name = options[:name]
|
|
52
|
+
@__datalist__[name]
|
|
53
|
+
else
|
|
54
|
+
sets = @__datalist__.values
|
|
55
|
+
options.each do|key, val|
|
|
56
|
+
sets = sets.select{|s|s[key.to_s]==val.to_s}
|
|
57
|
+
end
|
|
58
|
+
sets.first
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def source script
|
|
63
|
+
return nil unless script && script['context']
|
|
64
|
+
context = script['context'].join("\n")
|
|
65
|
+
case script['handler']
|
|
66
|
+
when 'ruby'
|
|
67
|
+
context
|
|
68
|
+
when 'yaml'
|
|
69
|
+
YAML.load context
|
|
70
|
+
when 'json'
|
|
71
|
+
JSON.parse context
|
|
72
|
+
else # plaintext
|
|
73
|
+
context
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# ruby inline
|
|
78
|
+
def run scripts=nil, &script_block
|
|
79
|
+
scripts ? self.module_eval(scripts) : self.module_eval(&script_block)
|
|
80
|
+
end
|
|
81
|
+
end
|
data/Tabot/newtab.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#coding:utf-8
|
|
2
|
+
require 'json'
|
|
3
|
+
|
|
4
|
+
module Newtab
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
def trans_table file, sheet, env='conda activate py311', temp='_temp.json'
|
|
8
|
+
table = %Q|python -c "import pandas as pd;pd.read_excel('#{file}',sheet_name='#{sheet}').to_json('#{temp}',orient='records',force_ascii=False)"|
|
|
9
|
+
system("#{env} && #{table}")
|
|
10
|
+
doc = JSON.parse File.read(temp)
|
|
11
|
+
File.delete(temp)
|
|
12
|
+
return doc
|
|
13
|
+
end
|
|
14
|
+
end
|
data/Tabot/simtab.rb
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
#coding:utf-8
|
|
2
|
+
|
|
3
|
+
module SimTab
|
|
4
|
+
##################################################################
|
|
5
|
+
# 说明
|
|
6
|
+
# 这个模块用于符合人类直觉的二维表处理,方便处理报表
|
|
7
|
+
# 一般格式是 [[表头], [记录], ...]
|
|
8
|
+
# 所有的操作处理都是针对记录不针对表头的
|
|
9
|
+
# 表头的格式由人主观决定,只提供绑定方法
|
|
10
|
+
##################################################################
|
|
11
|
+
|
|
12
|
+
##################################################################
|
|
13
|
+
# 读取报表
|
|
14
|
+
##################################################################
|
|
15
|
+
|
|
16
|
+
def self.读取报表 路径,分隔符="\t"
|
|
17
|
+
begin
|
|
18
|
+
File.read(路径).gsub("\r","").split("\n").map{|l|l.split(分隔符)}
|
|
19
|
+
rescue
|
|
20
|
+
File.read(路径).force_encoding('GBK').encode("UTF-8").gsub("\r","").split("\n").map{|l|l.split(分隔符)}
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def self.读取csv 路径,format=:raw
|
|
25
|
+
require 'csv'
|
|
26
|
+
csv = File.read(路径).force_encoding("GBK").encode("UTF-8")
|
|
27
|
+
content = CSV.parse(csv, headers:(format==:csv))
|
|
28
|
+
table = content.绑定表头 content[0]
|
|
29
|
+
return table
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.读取html 文档
|
|
33
|
+
doc = Nokogiri::HTML(文档)
|
|
34
|
+
table = []
|
|
35
|
+
doc.css("table").css("tr").each do|tr|
|
|
36
|
+
record = []
|
|
37
|
+
tr.css("td").each do|td|
|
|
38
|
+
record << td.text
|
|
39
|
+
end
|
|
40
|
+
table << record
|
|
41
|
+
end
|
|
42
|
+
return table
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
##################################################################
|
|
46
|
+
# 表头
|
|
47
|
+
# 如果已有表头,只需要'#绑定表头'指定表头即可
|
|
48
|
+
# 如果没有表头,可以'#表头'自动生成表头,也可以指定'#生成表头'再'#绑定表头'
|
|
49
|
+
##################################################################
|
|
50
|
+
|
|
51
|
+
def 生成表头 前缀=''
|
|
52
|
+
max = self.inject(0){|max,item|(max < item.size ? item.size : max)}
|
|
53
|
+
@表头 = Array.new(max){|i|i.to_s}
|
|
54
|
+
self.unshift @表头
|
|
55
|
+
@表头
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def 绑定表头 表头
|
|
59
|
+
@表头 = 表头 if 表头.instance_of?(Array)
|
|
60
|
+
self.each do|记录|
|
|
61
|
+
记录.绑定表头 表头 if 记录.instance_of?(Array)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def 表头
|
|
66
|
+
@表头 ||= []
|
|
67
|
+
if !@表头 or @表头.empty?
|
|
68
|
+
自定义表头 = 生成表头
|
|
69
|
+
绑定表头 自定义表头
|
|
70
|
+
end
|
|
71
|
+
@表头
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def 去掉表头
|
|
75
|
+
@表头 = []
|
|
76
|
+
self.shift # 只去掉形式上的表头,不会去掉记录的内联表头,以便恢复; 如想彻底去掉每条记录的表头,可绑定一个空表头
|
|
77
|
+
self
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def 展示表头
|
|
81
|
+
索引表 = []
|
|
82
|
+
@表头.each_with_index{|c,i|索引表 << "#{"%02s"%i}: #{c}"}
|
|
83
|
+
索引表.join("\n")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# 特殊的双层表头,计算时转化为单层(原单层不变)
|
|
87
|
+
def self.解析双层表头 列表
|
|
88
|
+
return 列表 unless 列表[0].instance_of?(Array)
|
|
89
|
+
表头 = []
|
|
90
|
+
列表[0].each_with_index do|item,index|
|
|
91
|
+
item=='' and index==0 and head = ''
|
|
92
|
+
item=='' and index >0 and head = 表头[-1].split("#")[0]
|
|
93
|
+
item!='' and head = item
|
|
94
|
+
表头 << [head, 列表[1][index]].join("#")
|
|
95
|
+
end if 列表.size == 2 && 列表[0].instance_of?(Array)
|
|
96
|
+
if 列表[0].size < 列表[1].size
|
|
97
|
+
num = 列表[1].size - 列表[0].size
|
|
98
|
+
head = 表头[-1].split("#")[0]
|
|
99
|
+
表头 += 列表[1][-1*num..-1].map{|i|"#{head}##{i}"}
|
|
100
|
+
end
|
|
101
|
+
return 表头
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# 特殊的双层表头,输出前转化为多层(原单层不变)
|
|
105
|
+
def self.生成双层表头 列表
|
|
106
|
+
表头 = 列表.map{|复合表头|复合表头.split("#")}
|
|
107
|
+
return [列表] if 表头[0].size==1
|
|
108
|
+
row1,row2 = [],[]
|
|
109
|
+
表头.each do|items|
|
|
110
|
+
row1 << items[0]
|
|
111
|
+
row2 << items[1]
|
|
112
|
+
end
|
|
113
|
+
return [row1,row2]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
##################################################################
|
|
117
|
+
# 查询(行)、选择(列)、排序
|
|
118
|
+
# 这里的计算结果都是生成新表
|
|
119
|
+
##################################################################
|
|
120
|
+
|
|
121
|
+
def 字段查询 字段,内容
|
|
122
|
+
索引 = self.表头.index(字段)
|
|
123
|
+
新表 = [self.表头]+self.select{|c|c[索引]==内容}
|
|
124
|
+
新表.绑定表头 self.表头
|
|
125
|
+
新表
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def 查询
|
|
129
|
+
结果 = []
|
|
130
|
+
self[1..-1].each do|记录|
|
|
131
|
+
yield(记录) and (结果.push 记录)
|
|
132
|
+
end
|
|
133
|
+
新表 = [self.表头]+结果
|
|
134
|
+
新表.绑定表头 self.表头
|
|
135
|
+
新表
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def 自定义排序
|
|
139
|
+
字典,逆序,counter = {},nil,0
|
|
140
|
+
self[1..-1].each_with_index do|记录,索引|
|
|
141
|
+
键,逆序 = yield(记录)
|
|
142
|
+
字典[键+"_#{"%010d"%counter}"] = 索引+1
|
|
143
|
+
counter += 1
|
|
144
|
+
end
|
|
145
|
+
结果 = 字典.keys.sort.inject([]) do|结果,键|
|
|
146
|
+
结果 << self[字典[键]]
|
|
147
|
+
结果
|
|
148
|
+
end
|
|
149
|
+
逆序==:reverse and 结果.reverse!
|
|
150
|
+
新表 = [self[0]]+结果
|
|
151
|
+
新表.绑定表头 self.表头
|
|
152
|
+
新表
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def 排序
|
|
156
|
+
新表 = [self[0]]+self[1..-1].sort
|
|
157
|
+
新表.绑定表头 self.表头
|
|
158
|
+
新表
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def 逆序
|
|
162
|
+
新表 = [self[0]]+self[1..-1].reverse
|
|
163
|
+
新表.绑定表头 self.表头
|
|
164
|
+
新表
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# 可以使用“.字段(index|"column"|...)”查询,找不到查询名称的场合用名称做占位符
|
|
168
|
+
def 字段 *属性表
|
|
169
|
+
字段表,新表头 = [],[]
|
|
170
|
+
属性表.each do|属性|
|
|
171
|
+
if 索引 = self.表头.index(属性)
|
|
172
|
+
字段表 << self[索引]
|
|
173
|
+
新表头 << 属性
|
|
174
|
+
else
|
|
175
|
+
字段表 << 属性
|
|
176
|
+
新表头 << 属性.to_s
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
if 字段表.size>1
|
|
180
|
+
字段表.绑定表头 新表头
|
|
181
|
+
字段表
|
|
182
|
+
else
|
|
183
|
+
字段表[0]
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# select 在集合中指选择符合条件的某些元素,在SQL中指投影到具体的列上,这里偏向后者,前者可以对应查询
|
|
188
|
+
def 选择 *字段列表 # col_num or 'col_name' or filler
|
|
189
|
+
字段索引 = 字段列表.map{|f|r = f.instance_of?(Integer) ? f : self.表头.index(f); r ? r : f }
|
|
190
|
+
新表头 = 字段列表.map{|f|r = f.instance_of?(Integer) ? self.表头[f] : f; r ? f : r }
|
|
191
|
+
新表 = [新表头]+self[1..-1].map{|记录|字段索引.map{|索引|索引.instance_of?(Integer) ? 记录[索引] : 索引}}
|
|
192
|
+
新表.绑定表头 新表头
|
|
193
|
+
新表
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def 拼接 另一个表
|
|
197
|
+
新表头 = self.表头 + 另一个表.表头
|
|
198
|
+
新表 = [新表头]+self[1..-1].product(另一个表[1..-1]).map{|t1,t2|t1+t2}
|
|
199
|
+
新表.绑定表头(新表头)
|
|
200
|
+
return 新表
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
##################################################################
|
|
204
|
+
# 统计
|
|
205
|
+
##################################################################
|
|
206
|
+
|
|
207
|
+
def 统计 字段
|
|
208
|
+
结果 = {}
|
|
209
|
+
索引 = self[0].index(字段)
|
|
210
|
+
self[1..-1].each do|记录|
|
|
211
|
+
结果[ 记录[索引] ] ||= []
|
|
212
|
+
结果[ 记录[索引] ] << 记录
|
|
213
|
+
end if 索引
|
|
214
|
+
结果
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def 字段条目数统计 路径='.'
|
|
218
|
+
字段唯一统计 = []
|
|
219
|
+
Dir.mkdir(路径) unless File.exist?(路径)
|
|
220
|
+
self[0].each do|字段|
|
|
221
|
+
统计表 = self.统计 字段
|
|
222
|
+
字段数值统计 = []
|
|
223
|
+
统计表.each do|字段值,条目|
|
|
224
|
+
字段数值统计 << "#{字段值}\t:\t#{条目.size}"
|
|
225
|
+
end
|
|
226
|
+
字段唯一统计 << "#{字段}\t:\t#{统计表.keys.size}"
|
|
227
|
+
Dir.mkdir("#{路径}/字段数值统计") unless File.exist?("#{路径}/字段数值统计")
|
|
228
|
+
File.write "#{路径}/字段数值统计/#{字段}(#{统计表.keys.size}).txt", 字段数值统计.join("\n")
|
|
229
|
+
end
|
|
230
|
+
File.write "#{路径}/字段唯一统计.txt", 字段唯一统计.join("\n")
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def 建立档案
|
|
234
|
+
文档 = []
|
|
235
|
+
self[1..-1].each_with_index do|记录,记录索引|
|
|
236
|
+
record = {'row' => 记录索引+1 }
|
|
237
|
+
记录.each_with_index do|字段,字段索引|
|
|
238
|
+
record[ self.表头[字段索引] ] = 字段
|
|
239
|
+
end
|
|
240
|
+
文档 << record
|
|
241
|
+
end
|
|
242
|
+
return 文档
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
##################################################################
|
|
246
|
+
# 排版
|
|
247
|
+
##################################################################
|
|
248
|
+
|
|
249
|
+
def 展示列表
|
|
250
|
+
self.map{|记录|记录.join("\t")}.join("\n")
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
class Array
|
|
256
|
+
attr_reader :表头
|
|
257
|
+
include SimTab
|
|
258
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#coding:utf-8
|
|
2
|
+
# require 'custom-core/string'
|
|
3
|
+
|
|
4
|
+
module ABString
|
|
5
|
+
# {:;} = Left Colon and Semicolon to Right Brace(LCSRB)
|
|
6
|
+
# {name : params ; body}
|
|
7
|
+
def parse_lcsrb string
|
|
8
|
+
head = "{"
|
|
9
|
+
tail = "}"
|
|
10
|
+
sequences = TextAbstract.match_cascade(string.gsub("\r",""),head,tail)
|
|
11
|
+
blocks = sequences.select{|i|i.instance_of?(Array)}
|
|
12
|
+
|
|
13
|
+
blocks.inject({}) do|table,block|
|
|
14
|
+
prename,prebody = block[1..-2].join.split(":")
|
|
15
|
+
name = diet(prename)
|
|
16
|
+
if prebody.include?(';')
|
|
17
|
+
preparams,postbody = prebody.split(";")
|
|
18
|
+
params = preparams.to_s.split(",").map{|r|diet(r)}
|
|
19
|
+
else
|
|
20
|
+
params,postbody = [],prebody
|
|
21
|
+
end
|
|
22
|
+
body = diet(postbody).split("\n").map{|line|diet(line)}.join("\n")
|
|
23
|
+
table[name] = [name,params,body]
|
|
24
|
+
table
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def parse string,option
|
|
29
|
+
case option
|
|
30
|
+
when :lcsrb
|
|
31
|
+
parse_lcsrb(string)
|
|
32
|
+
else
|
|
33
|
+
string
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def diet string
|
|
38
|
+
string.strip.gsub("\t"," ")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
module_function :parse,:diet,:parse_lcsrb
|
|
42
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#coding:utf-8
|
|
2
|
+
|
|
3
|
+
module TextAbstract
|
|
4
|
+
|
|
5
|
+
# 移植以前的facility/helpers/String::draw_fragments
|
|
6
|
+
# 特定环境下的遗留方法,不推荐常用
|
|
7
|
+
def self.draw_fragments text,pre_flag,post_flag
|
|
8
|
+
context,fragments = [],[]
|
|
9
|
+
temp,flag = "",false
|
|
10
|
+
text.each_line do|line|
|
|
11
|
+
if pre_flag.match(line)
|
|
12
|
+
temp = String.new if flag == false
|
|
13
|
+
flag = true
|
|
14
|
+
end
|
|
15
|
+
if flag == true
|
|
16
|
+
temp << line # base on line!
|
|
17
|
+
else
|
|
18
|
+
context << line
|
|
19
|
+
end
|
|
20
|
+
if post_flag.match(line)
|
|
21
|
+
flag = false
|
|
22
|
+
fragments << temp
|
|
23
|
+
temp = String.new
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
context << temp if flag == true # exist pre_flag but no post_flag
|
|
27
|
+
fragments.delete("")
|
|
28
|
+
return context.join(),fragments
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# 用来重写以前的facility/helpers/String::match_fragments
|
|
32
|
+
# start和finish有严格的先后顺序,一段不完不起另一段
|
|
33
|
+
def self.match_paragraph text,start,finish
|
|
34
|
+
paragraphs,current_text = [],text.clone
|
|
35
|
+
until current_text==""
|
|
36
|
+
return paragraphs unless current_text.include?(start) # 严格匹配开始
|
|
37
|
+
start_split = current_text.split(start)
|
|
38
|
+
current_text = start_split[1..-1].join(start)
|
|
39
|
+
return paragraphs unless current_text.include?(finish) # 严格匹配结束
|
|
40
|
+
finish_split = current_text.split(finish)
|
|
41
|
+
context = finish_split[0]
|
|
42
|
+
current_text = finish_split[1..-1].join(finish)+(text[-1*finish.size..-1]==finish ? finish : "")
|
|
43
|
+
paragraphs << context
|
|
44
|
+
end
|
|
45
|
+
return paragraphs
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# start和finish是可以嵌套的,即一个start和finish包含在另一个start和finish内,但在全局上还是顺序的
|
|
49
|
+
def self.match_cascade text,start,finish
|
|
50
|
+
paragraphs,current_text = [],text
|
|
51
|
+
return paragraphs unless text.include?(start) or text.include?(finish)
|
|
52
|
+
current_pos,parent_pos = paragraphs,[paragraphs]
|
|
53
|
+
until current_text==""
|
|
54
|
+
st_pos = (current_text.include?(start) ? current_text.split(start)[0] : current_text).size
|
|
55
|
+
fn_pos = (current_text.include?(finish) ? current_text.split(finish)[0] : current_text).size
|
|
56
|
+
if fn_pos==st_pos # 无标志或标志互包含
|
|
57
|
+
current_pos << current_text
|
|
58
|
+
current_text = ""
|
|
59
|
+
end
|
|
60
|
+
if fn_pos < st_pos # 结束在前
|
|
61
|
+
csp = current_text.split(finish)
|
|
62
|
+
current_pos << csp[0]
|
|
63
|
+
current_pos << finish # 本级完成
|
|
64
|
+
cur_parent = parent_pos.pop # 取回上一级
|
|
65
|
+
current_pos = cur_parent # 返回上一级
|
|
66
|
+
current_text = current_text.sub(csp[0],"").sub(finish,"")
|
|
67
|
+
end
|
|
68
|
+
if st_pos < fn_pos # 开始在前
|
|
69
|
+
csp = current_text.split(start)
|
|
70
|
+
current_pos << csp[0] unless csp[0]=="" # 本级内容加入
|
|
71
|
+
current_pos << [start] # 下一级内容加入
|
|
72
|
+
parent_pos << current_pos # 本级加入上下文
|
|
73
|
+
current_pos = current_pos[-1]
|
|
74
|
+
current_text = current_text.sub(csp[0],"").sub(start,"")
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
return paragraphs
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.match_html_tag html,tag
|
|
81
|
+
paragraphs = self.match_cascade(html,"<#{tag}","</#{tag}>")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|