crawlab_ruby_sdk 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -0
- data/lib/crawlab_ruby_sdk/version.rb +1 -1
- data/lib/crawlab_ruby_sdk.rb +17 -3
- data/lib/models/base.rb +5 -2
- data/lib/models/thinktank_expert.rb +34 -1
- data/lib/models/thinktank_expert_report.rb +37 -1
- data/lib/models/thinktank_information.rb +38 -1
- data/lib/models/thinktank_report.rb +37 -1
- data/lib/verify/verify.rb +40 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5c857dd2f243d47318786c542faec1a9fc7a6a52bba0edf7523313379b4f2d4
|
4
|
+
data.tar.gz: fbbacb701ad2bd5d090d85ba3322cd82a5c02f5d7d0d3ab729ee2b2b0d0882c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f9752ef0ad2e59d72fa870a6493a8d831716c0422bb4d6d08a0afc3cadc08ee9b4b80eb97af5192ab1094ad7884533b3c7c75cc76d1a47fe7c0b90f991538d0
|
7
|
+
data.tar.gz: 47e6a0373338ad0d32b8286d2b422da55a67d5728655fca51a028a17928ee301b981d17cb396e06d23f8911ac37663b471360f8ecce97e55b7f8595e4e8096c1
|
data/README.md
CHANGED
@@ -39,6 +39,19 @@ Or install it yourself as:
|
|
39
39
|
puts bucket_url
|
40
40
|
```
|
41
41
|
|
42
|
+
##### 字段验证规则
|
43
|
+
|
44
|
+
```
|
45
|
+
CrawlabRubySdk.save_item({name: "haha",age:12},table_name)
|
46
|
+
CrawlabRubySdk.save_items([{name: "haha",age:12},{name:"456",age:34}],table_name)
|
47
|
+
table_name 可以是这几个["thinktank_expert_reports", "thinktank_informations", "thinktank_experts", "thinktank_reports"] 中的一个,也可以不传递
|
48
|
+
|
49
|
+
1、如果当前采集的结果 存储的数据表是上面几个中的一个,无论是否传递,都会按照对应的表结构的 字段规则验证字段
|
50
|
+
2、如果当前采集的结果,存储的数据表不属于上面几个中的一个,则按照传递传递的table_name,对应的字段规则验证字段
|
51
|
+
3、如果没有传递table_name,并且采集的结果存储临时表,则根据推送的结果的字段匹配 上面四个表的字段,匹配上了就按照匹配上的表结构的字段规则验证,没有匹配上则不验证
|
52
|
+
```
|
53
|
+
|
54
|
+
|
42
55
|
## Development
|
43
56
|
|
44
57
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/crawlab_ruby_sdk.rb
CHANGED
@@ -36,6 +36,7 @@ module CrawlabRubySdk
|
|
36
36
|
auth = "Crawlab2021!"
|
37
37
|
end
|
38
38
|
|
39
|
+
table_name = get_table_name(table_name)
|
39
40
|
if !Verify.IsVerified?([item],table_name)
|
40
41
|
return
|
41
42
|
end
|
@@ -57,7 +58,9 @@ module CrawlabRubySdk
|
|
57
58
|
if auth==nil || auth == ""
|
58
59
|
auth = "Crawlab2021!"
|
59
60
|
end
|
60
|
-
|
61
|
+
|
62
|
+
table_name = get_table_name(table_name)
|
63
|
+
if !Verify.IsVerified?(items,table_name)
|
61
64
|
return
|
62
65
|
end
|
63
66
|
|
@@ -99,10 +102,11 @@ module CrawlabRubySdk
|
|
99
102
|
|
100
103
|
data = {task_id: task_id,data:records}.to_json.b
|
101
104
|
|
105
|
+
# puts "=====#{{task_id: task_id,data:records}.to_json}==="
|
106
|
+
|
102
107
|
msg = Grpc::StreamMessage.new(code:3,data:data)
|
103
|
-
puts data
|
104
108
|
|
105
|
-
|
109
|
+
sub_client.Send([msg])
|
106
110
|
end
|
107
111
|
|
108
112
|
def self.get_task_id
|
@@ -120,4 +124,14 @@ module CrawlabRubySdk
|
|
120
124
|
def self.save_file_stream_to_oss(oss_path,stream)
|
121
125
|
OssServerClient.new.send_stream(oss_path,stream)
|
122
126
|
end
|
127
|
+
|
128
|
+
def self.get_table_name(table_name="")
|
129
|
+
table_names = Verify.table_names
|
130
|
+
if ENV["TABLE_NAME"] != nil && ENV["TABLE_NAME"] != "" && table_names.include?(ENV["TABLE_NAME"])
|
131
|
+
table_name = ENV["TABLE_NAME"]
|
132
|
+
return table_name
|
133
|
+
end
|
134
|
+
|
135
|
+
return table_name
|
136
|
+
end
|
123
137
|
end
|
data/lib/models/base.rb
CHANGED
@@ -85,6 +85,7 @@ class BaseModel
|
|
85
85
|
rescue StandardError => e
|
86
86
|
return false
|
87
87
|
end
|
88
|
+
return true
|
88
89
|
end
|
89
90
|
|
90
91
|
def verify_string(v)
|
@@ -161,7 +162,7 @@ class BaseModel
|
|
161
162
|
return false
|
162
163
|
end
|
163
164
|
fields_arr = fn.split(":")
|
164
|
-
if
|
165
|
+
if fields_arr.size < 2
|
165
166
|
return false
|
166
167
|
end
|
167
168
|
fields_str = fields_arr[1]
|
@@ -169,15 +170,17 @@ class BaseModel
|
|
169
170
|
fields = fields_str.split(",")
|
170
171
|
|
171
172
|
begin
|
172
|
-
datas =
|
173
|
+
datas = JSON.parse(v)
|
173
174
|
datas.each do |data|
|
174
175
|
fields.each do |field|
|
175
176
|
if data[field] == nil
|
177
|
+
puts "ERROR #{field} not Exist!"
|
176
178
|
return false
|
177
179
|
end
|
178
180
|
end
|
179
181
|
end
|
180
182
|
rescue StandardError => e
|
183
|
+
puts e
|
181
184
|
return false
|
182
185
|
end
|
183
186
|
|
@@ -41,7 +41,40 @@ class ThinktankExpert < BaseModel
|
|
41
41
|
|
42
42
|
|
43
43
|
def initialize(options={})
|
44
|
-
|
44
|
+
options = JSON.parse(options.to_json)
|
45
|
+
@id = options["id"]
|
46
|
+
@name = options["name"]
|
47
|
+
@title = options["title"]
|
48
|
+
@content = options["content"]
|
49
|
+
@location = options["location"]
|
50
|
+
@area_of_expertise = options["area_of_expertise"]
|
51
|
+
@profile_images = options["profile_images"]
|
52
|
+
@phone = options["phone"]
|
53
|
+
@email = options["email"]
|
54
|
+
@link = options["link"]
|
55
|
+
@audios = options["audios"]
|
56
|
+
@videos = options["videos"]
|
57
|
+
@education = options["education"]
|
58
|
+
@related_topics = options["related_topics"]
|
59
|
+
@site_name = options["site_name"]
|
60
|
+
@site_name_cn = options["site_name_cn"]
|
61
|
+
@domain = options["domain"]
|
62
|
+
@created_at = options["created_at"]
|
63
|
+
@updated_at = options["updated_at"]
|
64
|
+
@source = options["source"]
|
65
|
+
@oss_profile_images = options["oss_profile_images"]
|
66
|
+
@facebook = options["facebook"]
|
67
|
+
@twitter = options["twitter"]
|
68
|
+
@linkedin = options["linkedin"]
|
69
|
+
@instagram = options["instagram"]
|
70
|
+
@wikidata = options["wikidata"]
|
71
|
+
@person_type = options["person_type"]
|
72
|
+
@files = options["files"]
|
73
|
+
@oss_files = options["oss_files"]
|
74
|
+
@associated_program = options["associated_program"]
|
75
|
+
@lang = options["lang"]
|
76
|
+
@website = options["website"]
|
77
|
+
@nationalities = options["nationalities"]
|
45
78
|
end
|
46
79
|
|
47
80
|
def as_json
|
@@ -42,7 +42,43 @@ class ThinktankExpertReport < BaseModel
|
|
42
42
|
register
|
43
43
|
|
44
44
|
def initialize(options={})
|
45
|
-
|
45
|
+
options = JSON.parse(options.to_json)
|
46
|
+
@id = options["id"]
|
47
|
+
@title = options["title"]
|
48
|
+
@site_name = options["site_name"]
|
49
|
+
@site_name_cn = options["site_name_cn"]
|
50
|
+
@abstract = options["abstract"]
|
51
|
+
@content = options["content"]
|
52
|
+
@author_names = options["author_names"]
|
53
|
+
@source = options["source"]
|
54
|
+
@files = options["files"]
|
55
|
+
@images = options["images"]
|
56
|
+
@videos = options["videos"]
|
57
|
+
@audios = options["audios"]
|
58
|
+
@links = options["links"]
|
59
|
+
@domain = options["domain"]
|
60
|
+
@keywords= options["keywords"]
|
61
|
+
@html_content = options["html_content"]
|
62
|
+
@lang = options["lang"]
|
63
|
+
@country_cn = options["country_cn"]
|
64
|
+
@country_code = options["country_code"]
|
65
|
+
@created_at = options["created_at"]
|
66
|
+
@updated_at = options["updated_at"]
|
67
|
+
@created_time = options["created_time"]
|
68
|
+
@oss_files = options["oss_files"]
|
69
|
+
@oss_images = options["oss_images"]
|
70
|
+
@customer_category = options["customer_category"]
|
71
|
+
@category = options["category"]
|
72
|
+
@topics = options["topics"]
|
73
|
+
@tags = options["tags"]
|
74
|
+
@views = options["views"]
|
75
|
+
@comments = options["comments"]
|
76
|
+
@reference = options["reference"]
|
77
|
+
@mention_country = options["mention_country"]
|
78
|
+
@authors = options["authors"]
|
79
|
+
@sub_title = options["sub_title"]
|
80
|
+
@timezone = options["timezone"]
|
81
|
+
@timezone_location = options["timezone_location"]
|
46
82
|
end
|
47
83
|
|
48
84
|
def as_json
|
@@ -44,7 +44,44 @@ class ThinktankInformation < BaseModel
|
|
44
44
|
|
45
45
|
|
46
46
|
def initialize(options={})
|
47
|
-
|
47
|
+
options = JSON.parse(options.to_json)
|
48
|
+
@id = options["id"]
|
49
|
+
@title = options["title"]
|
50
|
+
@site_name = options["site_name"]
|
51
|
+
@site_name_cn = options["site_name_cn"]
|
52
|
+
@abstract = options["abstract"]
|
53
|
+
@content = options["content"]
|
54
|
+
@author_names = options["author_names"]
|
55
|
+
@state_info = options["state_info"]
|
56
|
+
@source = options["source"]
|
57
|
+
@files = options["files"]
|
58
|
+
@images = options["images"]
|
59
|
+
@videos = options["videos"]
|
60
|
+
@audios = options["audios"]
|
61
|
+
@links = options["links"]
|
62
|
+
@domain = options["domain"]
|
63
|
+
@keywords = options["keywords"]
|
64
|
+
@html_content = options["html_content"]
|
65
|
+
@lang = options["lang"]
|
66
|
+
@country_cn = options["country_cn"]
|
67
|
+
@country_code = options["country_code"]
|
68
|
+
@created_at = options["created_at"]
|
69
|
+
@updated_at = options["updated_at"]
|
70
|
+
@created_time = options["created_time"]
|
71
|
+
@oss_files = options["oss_files"]
|
72
|
+
@oss_images = options["oss_images"]
|
73
|
+
@customer_category = options["customer_category"]
|
74
|
+
@category = options["category"]
|
75
|
+
@topics = options["topics"]
|
76
|
+
@tags = options["tags"]
|
77
|
+
@views = options["views"]
|
78
|
+
@comments = options["comments"]
|
79
|
+
@reference = options["reference"]
|
80
|
+
@mention_country = options["mention_country"]
|
81
|
+
@authors = options["authors"]
|
82
|
+
@sub_title = options["sub_title"]
|
83
|
+
@timezone = options["timezone"]
|
84
|
+
@timezone_location = options["timezone_location"]
|
48
85
|
end
|
49
86
|
|
50
87
|
def as_json
|
@@ -43,7 +43,43 @@ class ThinktankReport < BaseModel
|
|
43
43
|
|
44
44
|
|
45
45
|
def initialize(options = {})
|
46
|
-
|
46
|
+
options = JSON.parse(options.to_json)
|
47
|
+
@id = options["id"]
|
48
|
+
@title = options["title"]
|
49
|
+
@site_name = options["site_name"]
|
50
|
+
@site_name_cn = options["site_name_cn"]
|
51
|
+
@abstract = options["abstract"]
|
52
|
+
@content = options["content"]
|
53
|
+
@author_names = options["author_names"]
|
54
|
+
@source = options["source"]
|
55
|
+
@files = options["files"]
|
56
|
+
@images = options["images"]
|
57
|
+
@videos = options["videos"]
|
58
|
+
@audios = options["audios"]
|
59
|
+
@links = options["links"]
|
60
|
+
@domain = options["domain"]
|
61
|
+
@keywords = options["keywords"]
|
62
|
+
@html_content = options["html_content"]
|
63
|
+
@lang = options["lang"]
|
64
|
+
@country_cn = options["country_cn"]
|
65
|
+
@country_code = options["country_code"]
|
66
|
+
@created_at = options["created_at"]
|
67
|
+
@updated_at = options["updated_at"]
|
68
|
+
@created_time = options["created_time"]
|
69
|
+
@oss_files = options["oss_files"]
|
70
|
+
@oss_images = options["oss_images"]
|
71
|
+
@customer_category = options["customer_category"]
|
72
|
+
@category = options["category"]
|
73
|
+
@topics = options["topics"]
|
74
|
+
@tags = options["tags"]
|
75
|
+
@views = options["views"]
|
76
|
+
@comments = options["comments"]
|
77
|
+
@reference = options["reference"]
|
78
|
+
@mention_country = options["mention_country"]
|
79
|
+
@authors = options["authors"]
|
80
|
+
@sub_title = options["sub_title"]
|
81
|
+
@timezone = options["timezone"]
|
82
|
+
@timezone_location = options["timezone_location"]
|
47
83
|
end
|
48
84
|
|
49
85
|
def to_json
|
data/lib/verify/verify.rb
CHANGED
@@ -3,6 +3,10 @@ class Verify
|
|
3
3
|
|
4
4
|
if table_name == "" && items.size > 0
|
5
5
|
table_name = switch_table(items[0])
|
6
|
+
elsif table_name != "" && items.size > 0
|
7
|
+
if !match_fields?(items[0],table_name)
|
8
|
+
return
|
9
|
+
end
|
6
10
|
end
|
7
11
|
|
8
12
|
if table_name == ""
|
@@ -50,4 +54,40 @@ class Verify
|
|
50
54
|
return table_name
|
51
55
|
end
|
52
56
|
|
57
|
+
def self.match_fields?(item={},table_name)
|
58
|
+
if table_name == nil || table_name == ""
|
59
|
+
return false
|
60
|
+
end
|
61
|
+
item_keys = JSON.parse(item.to_json).keys
|
62
|
+
v = $map_models[table_name]
|
63
|
+
|
64
|
+
if v == nil
|
65
|
+
puts "ERROR #{table_name} not Exist!"
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
|
69
|
+
klass = Object.const_get v
|
70
|
+
klass_keys = klass.fields
|
71
|
+
|
72
|
+
# puts "----#{klass_keys}--"
|
73
|
+
sub_keys1 = item_keys - klass_keys
|
74
|
+
if sub_keys1.size > 0
|
75
|
+
puts "ERROR #{sub_keys1} do not belong #{table_name}"
|
76
|
+
return false
|
77
|
+
end
|
78
|
+
|
79
|
+
sub_keys2 = klass_keys - item_keys
|
80
|
+
if sub_keys2.size > 0
|
81
|
+
puts "ERROR #{sub_keys2} do not Exist!"
|
82
|
+
return false
|
83
|
+
end
|
84
|
+
|
85
|
+
return true
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.table_names
|
90
|
+
return $map_models.keys
|
91
|
+
end
|
92
|
+
|
53
93
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawlab_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- min
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-07-
|
11
|
+
date: 2023-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: grpc
|