crawlab_ruby_sdk 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -0
- data/lib/crawlab_ruby_sdk/version.rb +1 -1
- data/lib/crawlab_ruby_sdk.rb +17 -3
- data/lib/models/base.rb +5 -2
- data/lib/models/thinktank_expert.rb +34 -1
- data/lib/models/thinktank_expert_report.rb +37 -1
- data/lib/models/thinktank_information.rb +38 -1
- data/lib/models/thinktank_report.rb +37 -1
- data/lib/verify/verify.rb +40 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5c857dd2f243d47318786c542faec1a9fc7a6a52bba0edf7523313379b4f2d4
|
4
|
+
data.tar.gz: fbbacb701ad2bd5d090d85ba3322cd82a5c02f5d7d0d3ab729ee2b2b0d0882c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f9752ef0ad2e59d72fa870a6493a8d831716c0422bb4d6d08a0afc3cadc08ee9b4b80eb97af5192ab1094ad7884533b3c7c75cc76d1a47fe7c0b90f991538d0
|
7
|
+
data.tar.gz: 47e6a0373338ad0d32b8286d2b422da55a67d5728655fca51a028a17928ee301b981d17cb396e06d23f8911ac37663b471360f8ecce97e55b7f8595e4e8096c1
|
data/README.md
CHANGED
@@ -39,6 +39,19 @@ Or install it yourself as:
|
|
39
39
|
puts bucket_url
|
40
40
|
```
|
41
41
|
|
42
|
+
##### 字段验证规则
|
43
|
+
|
44
|
+
```
|
45
|
+
CrawlabRubySdk.save_item({name: "haha",age:12},table_name)
|
46
|
+
CrawlabRubySdk.save_items([{name: "haha",age:12},{name:"456",age:34}],table_name)
|
47
|
+
table_name 可以是这几个["thinktank_expert_reports", "thinktank_informations", "thinktank_experts", "thinktank_reports"] 中的一个,也可以不传递
|
48
|
+
|
49
|
+
1、如果当前采集的结果 存储的数据表是上面几个中的一个,无论是否传递,都会按照对应的表结构的 字段规则验证字段
|
50
|
+
2、如果当前采集的结果,存储的数据表不属于上面几个中的一个,则按照传递传递的table_name,对应的字段规则验证字段
|
51
|
+
3、如果没有传递table_name,并且采集的结果存储临时表,则根据推送的结果的字段匹配 上面四个表的字段,匹配上了就按照匹配上的表结构的字段规则验证,没有匹配上则不验证
|
52
|
+
```
|
53
|
+
|
54
|
+
|
42
55
|
## Development
|
43
56
|
|
44
57
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/crawlab_ruby_sdk.rb
CHANGED
@@ -36,6 +36,7 @@ module CrawlabRubySdk
|
|
36
36
|
auth = "Crawlab2021!"
|
37
37
|
end
|
38
38
|
|
39
|
+
table_name = get_table_name(table_name)
|
39
40
|
if !Verify.IsVerified?([item],table_name)
|
40
41
|
return
|
41
42
|
end
|
@@ -57,7 +58,9 @@ module CrawlabRubySdk
|
|
57
58
|
if auth==nil || auth == ""
|
58
59
|
auth = "Crawlab2021!"
|
59
60
|
end
|
60
|
-
|
61
|
+
|
62
|
+
table_name = get_table_name(table_name)
|
63
|
+
if !Verify.IsVerified?(items,table_name)
|
61
64
|
return
|
62
65
|
end
|
63
66
|
|
@@ -99,10 +102,11 @@ module CrawlabRubySdk
|
|
99
102
|
|
100
103
|
data = {task_id: task_id,data:records}.to_json.b
|
101
104
|
|
105
|
+
# puts "=====#{{task_id: task_id,data:records}.to_json}==="
|
106
|
+
|
102
107
|
msg = Grpc::StreamMessage.new(code:3,data:data)
|
103
|
-
puts data
|
104
108
|
|
105
|
-
|
109
|
+
sub_client.Send([msg])
|
106
110
|
end
|
107
111
|
|
108
112
|
def self.get_task_id
|
@@ -120,4 +124,14 @@ module CrawlabRubySdk
|
|
120
124
|
def self.save_file_stream_to_oss(oss_path,stream)
|
121
125
|
OssServerClient.new.send_stream(oss_path,stream)
|
122
126
|
end
|
127
|
+
|
128
|
+
def self.get_table_name(table_name="")
|
129
|
+
table_names = Verify.table_names
|
130
|
+
if ENV["TABLE_NAME"] != nil && ENV["TABLE_NAME"] != "" && table_names.include?(ENV["TABLE_NAME"])
|
131
|
+
table_name = ENV["TABLE_NAME"]
|
132
|
+
return table_name
|
133
|
+
end
|
134
|
+
|
135
|
+
return table_name
|
136
|
+
end
|
123
137
|
end
|
data/lib/models/base.rb
CHANGED
@@ -85,6 +85,7 @@ class BaseModel
|
|
85
85
|
rescue StandardError => e
|
86
86
|
return false
|
87
87
|
end
|
88
|
+
return true
|
88
89
|
end
|
89
90
|
|
90
91
|
def verify_string(v)
|
@@ -161,7 +162,7 @@ class BaseModel
|
|
161
162
|
return false
|
162
163
|
end
|
163
164
|
fields_arr = fn.split(":")
|
164
|
-
if
|
165
|
+
if fields_arr.size < 2
|
165
166
|
return false
|
166
167
|
end
|
167
168
|
fields_str = fields_arr[1]
|
@@ -169,15 +170,17 @@ class BaseModel
|
|
169
170
|
fields = fields_str.split(",")
|
170
171
|
|
171
172
|
begin
|
172
|
-
datas =
|
173
|
+
datas = JSON.parse(v)
|
173
174
|
datas.each do |data|
|
174
175
|
fields.each do |field|
|
175
176
|
if data[field] == nil
|
177
|
+
puts "ERROR #{field} not Exist!"
|
176
178
|
return false
|
177
179
|
end
|
178
180
|
end
|
179
181
|
end
|
180
182
|
rescue StandardError => e
|
183
|
+
puts e
|
181
184
|
return false
|
182
185
|
end
|
183
186
|
|
@@ -41,7 +41,40 @@ class ThinktankExpert < BaseModel
|
|
41
41
|
|
42
42
|
|
43
43
|
def initialize(options={})
|
44
|
-
|
44
|
+
options = JSON.parse(options.to_json)
|
45
|
+
@id = options["id"]
|
46
|
+
@name = options["name"]
|
47
|
+
@title = options["title"]
|
48
|
+
@content = options["content"]
|
49
|
+
@location = options["location"]
|
50
|
+
@area_of_expertise = options["area_of_expertise"]
|
51
|
+
@profile_images = options["profile_images"]
|
52
|
+
@phone = options["phone"]
|
53
|
+
@email = options["email"]
|
54
|
+
@link = options["link"]
|
55
|
+
@audios = options["audios"]
|
56
|
+
@videos = options["videos"]
|
57
|
+
@education = options["education"]
|
58
|
+
@related_topics = options["related_topics"]
|
59
|
+
@site_name = options["site_name"]
|
60
|
+
@site_name_cn = options["site_name_cn"]
|
61
|
+
@domain = options["domain"]
|
62
|
+
@created_at = options["created_at"]
|
63
|
+
@updated_at = options["updated_at"]
|
64
|
+
@source = options["source"]
|
65
|
+
@oss_profile_images = options["oss_profile_images"]
|
66
|
+
@facebook = options["facebook"]
|
67
|
+
@twitter = options["twitter"]
|
68
|
+
@linkedin = options["linkedin"]
|
69
|
+
@instagram = options["instagram"]
|
70
|
+
@wikidata = options["wikidata"]
|
71
|
+
@person_type = options["person_type"]
|
72
|
+
@files = options["files"]
|
73
|
+
@oss_files = options["oss_files"]
|
74
|
+
@associated_program = options["associated_program"]
|
75
|
+
@lang = options["lang"]
|
76
|
+
@website = options["website"]
|
77
|
+
@nationalities = options["nationalities"]
|
45
78
|
end
|
46
79
|
|
47
80
|
def as_json
|
@@ -42,7 +42,43 @@ class ThinktankExpertReport < BaseModel
|
|
42
42
|
register
|
43
43
|
|
44
44
|
def initialize(options={})
|
45
|
-
|
45
|
+
options = JSON.parse(options.to_json)
|
46
|
+
@id = options["id"]
|
47
|
+
@title = options["title"]
|
48
|
+
@site_name = options["site_name"]
|
49
|
+
@site_name_cn = options["site_name_cn"]
|
50
|
+
@abstract = options["abstract"]
|
51
|
+
@content = options["content"]
|
52
|
+
@author_names = options["author_names"]
|
53
|
+
@source = options["source"]
|
54
|
+
@files = options["files"]
|
55
|
+
@images = options["images"]
|
56
|
+
@videos = options["videos"]
|
57
|
+
@audios = options["audios"]
|
58
|
+
@links = options["links"]
|
59
|
+
@domain = options["domain"]
|
60
|
+
@keywords= options["keywords"]
|
61
|
+
@html_content = options["html_content"]
|
62
|
+
@lang = options["lang"]
|
63
|
+
@country_cn = options["country_cn"]
|
64
|
+
@country_code = options["country_code"]
|
65
|
+
@created_at = options["created_at"]
|
66
|
+
@updated_at = options["updated_at"]
|
67
|
+
@created_time = options["created_time"]
|
68
|
+
@oss_files = options["oss_files"]
|
69
|
+
@oss_images = options["oss_images"]
|
70
|
+
@customer_category = options["customer_category"]
|
71
|
+
@category = options["category"]
|
72
|
+
@topics = options["topics"]
|
73
|
+
@tags = options["tags"]
|
74
|
+
@views = options["views"]
|
75
|
+
@comments = options["comments"]
|
76
|
+
@reference = options["reference"]
|
77
|
+
@mention_country = options["mention_country"]
|
78
|
+
@authors = options["authors"]
|
79
|
+
@sub_title = options["sub_title"]
|
80
|
+
@timezone = options["timezone"]
|
81
|
+
@timezone_location = options["timezone_location"]
|
46
82
|
end
|
47
83
|
|
48
84
|
def as_json
|
@@ -44,7 +44,44 @@ class ThinktankInformation < BaseModel
|
|
44
44
|
|
45
45
|
|
46
46
|
def initialize(options={})
|
47
|
-
|
47
|
+
options = JSON.parse(options.to_json)
|
48
|
+
@id = options["id"]
|
49
|
+
@title = options["title"]
|
50
|
+
@site_name = options["site_name"]
|
51
|
+
@site_name_cn = options["site_name_cn"]
|
52
|
+
@abstract = options["abstract"]
|
53
|
+
@content = options["content"]
|
54
|
+
@author_names = options["author_names"]
|
55
|
+
@state_info = options["state_info"]
|
56
|
+
@source = options["source"]
|
57
|
+
@files = options["files"]
|
58
|
+
@images = options["images"]
|
59
|
+
@videos = options["videos"]
|
60
|
+
@audios = options["audios"]
|
61
|
+
@links = options["links"]
|
62
|
+
@domain = options["domain"]
|
63
|
+
@keywords = options["keywords"]
|
64
|
+
@html_content = options["html_content"]
|
65
|
+
@lang = options["lang"]
|
66
|
+
@country_cn = options["country_cn"]
|
67
|
+
@country_code = options["country_code"]
|
68
|
+
@created_at = options["created_at"]
|
69
|
+
@updated_at = options["updated_at"]
|
70
|
+
@created_time = options["created_time"]
|
71
|
+
@oss_files = options["oss_files"]
|
72
|
+
@oss_images = options["oss_images"]
|
73
|
+
@customer_category = options["customer_category"]
|
74
|
+
@category = options["category"]
|
75
|
+
@topics = options["topics"]
|
76
|
+
@tags = options["tags"]
|
77
|
+
@views = options["views"]
|
78
|
+
@comments = options["comments"]
|
79
|
+
@reference = options["reference"]
|
80
|
+
@mention_country = options["mention_country"]
|
81
|
+
@authors = options["authors"]
|
82
|
+
@sub_title = options["sub_title"]
|
83
|
+
@timezone = options["timezone"]
|
84
|
+
@timezone_location = options["timezone_location"]
|
48
85
|
end
|
49
86
|
|
50
87
|
def as_json
|
@@ -43,7 +43,43 @@ class ThinktankReport < BaseModel
|
|
43
43
|
|
44
44
|
|
45
45
|
def initialize(options = {})
|
46
|
-
|
46
|
+
options = JSON.parse(options.to_json)
|
47
|
+
@id = options["id"]
|
48
|
+
@title = options["title"]
|
49
|
+
@site_name = options["site_name"]
|
50
|
+
@site_name_cn = options["site_name_cn"]
|
51
|
+
@abstract = options["abstract"]
|
52
|
+
@content = options["content"]
|
53
|
+
@author_names = options["author_names"]
|
54
|
+
@source = options["source"]
|
55
|
+
@files = options["files"]
|
56
|
+
@images = options["images"]
|
57
|
+
@videos = options["videos"]
|
58
|
+
@audios = options["audios"]
|
59
|
+
@links = options["links"]
|
60
|
+
@domain = options["domain"]
|
61
|
+
@keywords = options["keywords"]
|
62
|
+
@html_content = options["html_content"]
|
63
|
+
@lang = options["lang"]
|
64
|
+
@country_cn = options["country_cn"]
|
65
|
+
@country_code = options["country_code"]
|
66
|
+
@created_at = options["created_at"]
|
67
|
+
@updated_at = options["updated_at"]
|
68
|
+
@created_time = options["created_time"]
|
69
|
+
@oss_files = options["oss_files"]
|
70
|
+
@oss_images = options["oss_images"]
|
71
|
+
@customer_category = options["customer_category"]
|
72
|
+
@category = options["category"]
|
73
|
+
@topics = options["topics"]
|
74
|
+
@tags = options["tags"]
|
75
|
+
@views = options["views"]
|
76
|
+
@comments = options["comments"]
|
77
|
+
@reference = options["reference"]
|
78
|
+
@mention_country = options["mention_country"]
|
79
|
+
@authors = options["authors"]
|
80
|
+
@sub_title = options["sub_title"]
|
81
|
+
@timezone = options["timezone"]
|
82
|
+
@timezone_location = options["timezone_location"]
|
47
83
|
end
|
48
84
|
|
49
85
|
def to_json
|
data/lib/verify/verify.rb
CHANGED
@@ -3,6 +3,10 @@ class Verify
|
|
3
3
|
|
4
4
|
if table_name == "" && items.size > 0
|
5
5
|
table_name = switch_table(items[0])
|
6
|
+
elsif table_name != "" && items.size > 0
|
7
|
+
if !match_fields?(items[0],table_name)
|
8
|
+
return
|
9
|
+
end
|
6
10
|
end
|
7
11
|
|
8
12
|
if table_name == ""
|
@@ -50,4 +54,40 @@ class Verify
|
|
50
54
|
return table_name
|
51
55
|
end
|
52
56
|
|
57
|
+
def self.match_fields?(item={},table_name)
|
58
|
+
if table_name == nil || table_name == ""
|
59
|
+
return false
|
60
|
+
end
|
61
|
+
item_keys = JSON.parse(item.to_json).keys
|
62
|
+
v = $map_models[table_name]
|
63
|
+
|
64
|
+
if v == nil
|
65
|
+
puts "ERROR #{table_name} not Exist!"
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
|
69
|
+
klass = Object.const_get v
|
70
|
+
klass_keys = klass.fields
|
71
|
+
|
72
|
+
# puts "----#{klass_keys}--"
|
73
|
+
sub_keys1 = item_keys - klass_keys
|
74
|
+
if sub_keys1.size > 0
|
75
|
+
puts "ERROR #{sub_keys1} do not belong #{table_name}"
|
76
|
+
return false
|
77
|
+
end
|
78
|
+
|
79
|
+
sub_keys2 = klass_keys - item_keys
|
80
|
+
if sub_keys2.size > 0
|
81
|
+
puts "ERROR #{sub_keys2} do not Exist!"
|
82
|
+
return false
|
83
|
+
end
|
84
|
+
|
85
|
+
return true
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.table_names
|
90
|
+
return $map_models.keys
|
91
|
+
end
|
92
|
+
|
53
93
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawlab_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- min
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-07-
|
11
|
+
date: 2023-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: grpc
|