crawlab_ruby_sdk 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/crawlab_ruby_sdk/version.rb +1 -1
- data/lib/crawlab_ruby_sdk.rb +15 -11
- data/lib/models/base.rb +188 -0
- data/lib/models/thinktank_expert.rb +107 -0
- data/lib/models/thinktank_expert_report.rb +124 -0
- data/lib/models/thinktank_information.rb +126 -0
- data/lib/models/thinktank_report.rb +125 -0
- data/lib/verify/verify.rb +53 -0
- data/ruby_sdk_test.rb +6 -2
- metadata +8 -6
- data/lib/entity/result_pb.rb +0 -15
- data/lib/entity/stream_message_data_task_pb.rb +0 -58
- data/lib/models/node_pb.rb +0 -27
- data/lib/models/task_pb.rb +0 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c85282caf96edf6a9100a70b045bb1004a471db9d2a7842f83e77a28fbde32e
|
4
|
+
data.tar.gz: cc64ce0004eeede08da51a5a7eec294f0db1b494d897079562a1578128960a1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 012211e75c7de44e0493c931ce3d6d4cd8f1351da52ae4bd77da3aca95e39294d3c4c9e2c5cf2d5ebd877dcb1dbd2e45bc9f9b8ce2199ffdf3d71c3f6de517a4
|
7
|
+
data.tar.gz: 380d04dddd44c608e80eebc23f601f23acaacc08fe291e6a711749e65b00d7aef664a8d30a0a0537bfec82c4e02620da3dfdbcf259bd1bd6aecc2bebaaf821d2
|
data/lib/crawlab_ruby_sdk.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require "crawlab_ruby_sdk/version"
|
1
|
+
# require "crawlab_ruby_sdk/version"
|
2
2
|
require "grpc"
|
3
3
|
require "json"
|
4
4
|
def traverse_dir(file_path)
|
@@ -14,17 +14,18 @@ dir = File.expand_path("..", __FILE__)
|
|
14
14
|
# puts dir
|
15
15
|
|
16
16
|
traverse_dir(dir+'/entity/stream_message_code_pb.rb')
|
17
|
-
traverse_dir(dir+'/entity/result_pb.rb')
|
18
17
|
traverse_dir(dir+'/entity/stream_message_pb.rb')
|
19
|
-
traverse_dir(dir+'/entity/stream_message_data_task_pb.rb')
|
20
18
|
traverse_dir(dir+'/client')
|
19
|
+
traverse_dir(dir+'/models/base.rb')
|
20
|
+
traverse_dir(dir+'/models')
|
21
|
+
traverse_dir(dir+'/verify')
|
21
22
|
|
22
23
|
module CrawlabRubySdk
|
23
24
|
class Error < StandardError; end
|
24
25
|
# Your code goes here...
|
25
26
|
|
26
27
|
|
27
|
-
def self.save_item(item={})
|
28
|
+
def self.save_item(item={},table_name="")
|
28
29
|
address = ENV["CRAWLAB_GRPC_ADDRESS"]
|
29
30
|
if address==nil || address == ""
|
30
31
|
address = "localhost:9666"
|
@@ -34,6 +35,10 @@ module CrawlabRubySdk
|
|
34
35
|
if auth==nil || auth == ""
|
35
36
|
auth = "Crawlab2021!"
|
36
37
|
end
|
38
|
+
|
39
|
+
if !Verify.IsVerified?([item],table_name)
|
40
|
+
return
|
41
|
+
end
|
37
42
|
|
38
43
|
client = TaskServiceClient.new(address,auth)
|
39
44
|
|
@@ -42,7 +47,7 @@ module CrawlabRubySdk
|
|
42
47
|
save(sub_client,[item])
|
43
48
|
end
|
44
49
|
|
45
|
-
def self.save_items(items=[])
|
50
|
+
def self.save_items(items=[],table_name="")
|
46
51
|
address = ENV["CRAWLAB_GRPC_ADDRESS"]
|
47
52
|
if address==nil || address == ""
|
48
53
|
address = "localhost:9666"
|
@@ -52,6 +57,9 @@ module CrawlabRubySdk
|
|
52
57
|
if auth==nil || auth == ""
|
53
58
|
auth = "Crawlab2021!"
|
54
59
|
end
|
60
|
+
if !Verify.IsVerified?([item],table_name)
|
61
|
+
return
|
62
|
+
end
|
55
63
|
|
56
64
|
client = TaskServiceClient.new(address,auth)
|
57
65
|
|
@@ -90,15 +98,11 @@ module CrawlabRubySdk
|
|
90
98
|
end
|
91
99
|
|
92
100
|
data = {task_id: task_id,data:records}.to_json.b
|
93
|
-
# data = data.encode("utf-8")
|
94
|
-
# puts data
|
95
|
-
|
96
|
-
# data = data.encode('ASCII-8BIT', invalid: :replace, undef: :replace, replace: '')
|
97
|
-
# puts data
|
98
101
|
|
99
102
|
msg = Grpc::StreamMessage.new(code:3,data:data)
|
103
|
+
puts data
|
100
104
|
|
101
|
-
sub_client.Send([msg])
|
105
|
+
# sub_client.Send([msg])
|
102
106
|
end
|
103
107
|
|
104
108
|
def self.get_task_id
|
data/lib/models/base.rb
ADDED
@@ -0,0 +1,188 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
$map_models = {}
|
4
|
+
class BaseModel
|
5
|
+
def self.table_name
|
6
|
+
return ""
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.register
|
10
|
+
$map_models[table_name] = self.name
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.fields
|
14
|
+
JSON.parse(self.new({}).as_json.to_json).keys
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.verify_keys
|
18
|
+
return {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def as_json
|
22
|
+
{}
|
23
|
+
end
|
24
|
+
|
25
|
+
def verify
|
26
|
+
json_data = as_json
|
27
|
+
verify_keys = self.class.verify_keys
|
28
|
+
|
29
|
+
verify_keys.each do |key,fns|
|
30
|
+
value = json_data[key.to_sym]
|
31
|
+
if fns.class.name == "Array"
|
32
|
+
fns.each do |fn|
|
33
|
+
if fn == "empty"
|
34
|
+
if !verify_empty(value)
|
35
|
+
puts "ERROR: #{key} cannot be empty!"
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
elsif fn == "json"
|
39
|
+
if !verify_json(value)
|
40
|
+
puts "ERROR: #{key} json string parse fail!"
|
41
|
+
return false
|
42
|
+
end
|
43
|
+
elsif fn == "string"
|
44
|
+
if !verify_string(value)
|
45
|
+
puts "ERROR: #{key} field type is not string!"
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
elsif fn == "int"
|
49
|
+
if !verify_int(value)
|
50
|
+
puts "ERROR: #{key} field type is not int!"
|
51
|
+
return false
|
52
|
+
end
|
53
|
+
elsif fn.include?("regex")
|
54
|
+
if !verify_regex(fn,value)
|
55
|
+
puts "ERROR: #{key} regex #{fn} match error"
|
56
|
+
return false
|
57
|
+
end
|
58
|
+
elsif fn.include?("length")
|
59
|
+
if !verify_length(fn,value)
|
60
|
+
puts "ERROR: #{key} length must be #{fn}"
|
61
|
+
return false
|
62
|
+
end
|
63
|
+
elsif fn.include?("fields")
|
64
|
+
if !verify_fields(fn,value)
|
65
|
+
puts "ERROR: #{key}:#{fn} not Exist!"
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def verify_empty(v)
|
75
|
+
if v == nil || v == ""
|
76
|
+
return false
|
77
|
+
end
|
78
|
+
return true
|
79
|
+
end
|
80
|
+
|
81
|
+
def verify_json(v)
|
82
|
+
begin
|
83
|
+
JSON.parse(v)
|
84
|
+
return true
|
85
|
+
rescue StandardError => e
|
86
|
+
return false
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def verify_string(v)
|
91
|
+
if v == nil
|
92
|
+
return false
|
93
|
+
end
|
94
|
+
if v.class.name == "String"
|
95
|
+
return true
|
96
|
+
end
|
97
|
+
return false
|
98
|
+
end
|
99
|
+
|
100
|
+
def verify_int(v)
|
101
|
+
if v == nil
|
102
|
+
return false
|
103
|
+
end
|
104
|
+
|
105
|
+
if v.class.name == "Integer"
|
106
|
+
return true
|
107
|
+
end
|
108
|
+
return false
|
109
|
+
end
|
110
|
+
|
111
|
+
def verify_regex(fn,v)
|
112
|
+
if fn.class.name != "String"
|
113
|
+
return false
|
114
|
+
end
|
115
|
+
regex_arr = fn.split(":")
|
116
|
+
if regex_arr.size < 2
|
117
|
+
return false
|
118
|
+
end
|
119
|
+
regex_str = regex_arr[1]
|
120
|
+
if regex_str.size == 0
|
121
|
+
return false
|
122
|
+
end
|
123
|
+
|
124
|
+
if v == nil || v == ""
|
125
|
+
return false
|
126
|
+
end
|
127
|
+
|
128
|
+
if v =~ Regexp.new(regex_str)
|
129
|
+
return true
|
130
|
+
else
|
131
|
+
return false
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def verify_length(fn,v)
|
136
|
+
if fn.class.name != "String"
|
137
|
+
return false
|
138
|
+
end
|
139
|
+
length_arr = fn.split(":")
|
140
|
+
if length_arr.size < 2
|
141
|
+
return false
|
142
|
+
end
|
143
|
+
length = length_arr[1].to_i
|
144
|
+
if length == 0
|
145
|
+
return false
|
146
|
+
end
|
147
|
+
|
148
|
+
if v == nil
|
149
|
+
return false
|
150
|
+
end
|
151
|
+
|
152
|
+
if v.to_s.size == length
|
153
|
+
return true
|
154
|
+
end
|
155
|
+
|
156
|
+
return false
|
157
|
+
end
|
158
|
+
|
159
|
+
def verify_fields(fn,v)
|
160
|
+
if fn.class.name != "String"
|
161
|
+
return false
|
162
|
+
end
|
163
|
+
fields_arr = fn.split(":")
|
164
|
+
if length_arr.size < 2
|
165
|
+
return false
|
166
|
+
end
|
167
|
+
fields_str = fields_arr[1]
|
168
|
+
|
169
|
+
fields = fields_str.split(",")
|
170
|
+
|
171
|
+
begin
|
172
|
+
datas = json.parse(v)
|
173
|
+
datas.each do |data|
|
174
|
+
fields.each do |field|
|
175
|
+
if data[field] == nil
|
176
|
+
return false
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
rescue StandardError => e
|
181
|
+
return false
|
182
|
+
end
|
183
|
+
|
184
|
+
return true
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
class ThinktankExpert < BaseModel
|
2
|
+
attr_accessor :id
|
3
|
+
attr_accessor :name
|
4
|
+
attr_accessor :title
|
5
|
+
attr_accessor :content
|
6
|
+
attr_accessor :location
|
7
|
+
attr_accessor :area_of_expertise
|
8
|
+
attr_accessor :profile_images
|
9
|
+
attr_accessor :phone
|
10
|
+
attr_accessor :email
|
11
|
+
attr_accessor :link
|
12
|
+
attr_accessor :audios
|
13
|
+
attr_accessor :videos
|
14
|
+
attr_accessor :education
|
15
|
+
attr_accessor :related_topics
|
16
|
+
attr_accessor :site_name
|
17
|
+
attr_accessor :site_name_cn
|
18
|
+
attr_accessor :domain
|
19
|
+
attr_accessor :created_at
|
20
|
+
attr_accessor :updated_at
|
21
|
+
attr_accessor :source
|
22
|
+
attr_accessor :oss_profile_images
|
23
|
+
attr_accessor :facebook
|
24
|
+
attr_accessor :twitter
|
25
|
+
attr_accessor :linkedin
|
26
|
+
attr_accessor :instagram
|
27
|
+
attr_accessor :wikidata
|
28
|
+
attr_accessor :person_type
|
29
|
+
attr_accessor :files
|
30
|
+
attr_accessor :oss_files
|
31
|
+
attr_accessor :associated_program
|
32
|
+
attr_accessor :lang
|
33
|
+
attr_accessor :website
|
34
|
+
attr_accessor :nationalities
|
35
|
+
|
36
|
+
|
37
|
+
def self.table_name
|
38
|
+
return "thinktank_experts"
|
39
|
+
end
|
40
|
+
register
|
41
|
+
|
42
|
+
|
43
|
+
def initialize(options={})
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def as_json
|
48
|
+
return {
|
49
|
+
id: @id,
|
50
|
+
name: @name,
|
51
|
+
title: @title,
|
52
|
+
content: @content,
|
53
|
+
location: @location,
|
54
|
+
area_of_expertise: @area_of_expertise,
|
55
|
+
profile_images: @profile_images,
|
56
|
+
phone: @phone,
|
57
|
+
email: @email,
|
58
|
+
link: @link,
|
59
|
+
audios: @audios,
|
60
|
+
videos: @videos,
|
61
|
+
education: @education,
|
62
|
+
related_topics: @related_topics,
|
63
|
+
site_name: @site_name,
|
64
|
+
site_name_cn: @site_name_cn,
|
65
|
+
domain: @domain,
|
66
|
+
created_at: @created_at,
|
67
|
+
updated_at: @updated_at,
|
68
|
+
source: @source,
|
69
|
+
oss_profile_images: @oss_profile_images,
|
70
|
+
facebook: @facebook,
|
71
|
+
twitter: @twitter,
|
72
|
+
linkedin: @linkedin,
|
73
|
+
instagram: @instagram,
|
74
|
+
wikidata: @wikidata,
|
75
|
+
person_type: @person_type,
|
76
|
+
files: @files,
|
77
|
+
oss_files: @oss_files,
|
78
|
+
associated_program: @associated_program,
|
79
|
+
lang: @lang,
|
80
|
+
website: @website,
|
81
|
+
nationalities: @nationalities,
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_json
|
86
|
+
return as_json.to_json
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.verify_keys
|
90
|
+
return {
|
91
|
+
"id"=> ["empty", "string"],
|
92
|
+
"title"=> ["json", "string"],
|
93
|
+
"name"=> ["empty", "string"],
|
94
|
+
"site_name_cn"=>["empty", "string"],
|
95
|
+
"site_name"=> ["empty", "string"],
|
96
|
+
"source"=> ["empty", "string"],
|
97
|
+
"audios"=> ["json", "string"],
|
98
|
+
"videos"=> ["json", "string"],
|
99
|
+
"related_topics"=>["json", "string"],
|
100
|
+
"files"=> ["json", "string"],
|
101
|
+
"oss_files"=> ["json", "string"],
|
102
|
+
"domain"=> ["empty", "string"],
|
103
|
+
"created_at"=> ["empty", "int", "length:13"],
|
104
|
+
"updated_at"=> ["empty", "int", "length:13"],
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
class ThinktankExpertReport < BaseModel
|
2
|
+
attr_accessor :id
|
3
|
+
attr_accessor :title
|
4
|
+
attr_accessor :site_name
|
5
|
+
attr_accessor :site_name_cn
|
6
|
+
attr_accessor :abstract
|
7
|
+
attr_accessor :content
|
8
|
+
attr_accessor :author_names
|
9
|
+
attr_accessor :source
|
10
|
+
attr_accessor :files
|
11
|
+
attr_accessor :images
|
12
|
+
attr_accessor :videos
|
13
|
+
attr_accessor :audios
|
14
|
+
attr_accessor :links
|
15
|
+
attr_accessor :domain
|
16
|
+
attr_accessor :keywords
|
17
|
+
attr_accessor :html_content
|
18
|
+
attr_accessor :lang
|
19
|
+
attr_accessor :country_cn
|
20
|
+
attr_accessor :country_code
|
21
|
+
attr_accessor :created_at
|
22
|
+
attr_accessor :updated_at
|
23
|
+
attr_accessor :created_time
|
24
|
+
attr_accessor :oss_files
|
25
|
+
attr_accessor :oss_images
|
26
|
+
attr_accessor :customer_category
|
27
|
+
attr_accessor :category
|
28
|
+
attr_accessor :topics
|
29
|
+
attr_accessor :tags
|
30
|
+
attr_accessor :views
|
31
|
+
attr_accessor :comments
|
32
|
+
attr_accessor :reference
|
33
|
+
attr_accessor :mention_country
|
34
|
+
attr_accessor :authors
|
35
|
+
attr_accessor :sub_title
|
36
|
+
attr_accessor :timezone
|
37
|
+
attr_accessor :timezone_location
|
38
|
+
|
39
|
+
def self.table_name
|
40
|
+
return "thinktank_expert_reports"
|
41
|
+
end
|
42
|
+
register
|
43
|
+
|
44
|
+
def initialize(options={})
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def as_json
|
49
|
+
return {
|
50
|
+
id: @id,
|
51
|
+
title: @title,
|
52
|
+
site_name: @site_name,
|
53
|
+
site_name_cn: @site_name_cn,
|
54
|
+
abstract: @abstract,
|
55
|
+
content: @content,
|
56
|
+
author_names: @author_names,
|
57
|
+
source: @source,
|
58
|
+
files: @files,
|
59
|
+
images: @images,
|
60
|
+
videos: @videos,
|
61
|
+
audios: @audios,
|
62
|
+
links: @links,
|
63
|
+
domain: @domain,
|
64
|
+
keywords: @keywords,
|
65
|
+
html_content: @html_content,
|
66
|
+
lang: @lang,
|
67
|
+
country_cn: @country_cn,
|
68
|
+
country_code: @country_code,
|
69
|
+
created_at: @created_at,
|
70
|
+
updated_at: @updated_at,
|
71
|
+
created_time: @created_time,
|
72
|
+
oss_files: @oss_files,
|
73
|
+
oss_images: @oss_images,
|
74
|
+
customer_category: @customer_category,
|
75
|
+
category: @category,
|
76
|
+
topics: @topics,
|
77
|
+
tags: @tags,
|
78
|
+
views: @views,
|
79
|
+
comments: @comments,
|
80
|
+
reference: @reference,
|
81
|
+
mention_country: @mention_country,
|
82
|
+
authors: @authors,
|
83
|
+
sub_title: @sub_title,
|
84
|
+
timezone: @timezone,
|
85
|
+
timezone_location: @timezone_location,
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_json
|
90
|
+
return as_json.to_json
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.verify_keys
|
94
|
+
return {
|
95
|
+
"id"=> ["empty", "string"],
|
96
|
+
"title"=> ["empty", "string"],
|
97
|
+
"site_name"=> ["empty", "string"],
|
98
|
+
"site_name_cn"=> ["empty", "string"],
|
99
|
+
"content"=> ["empty", "string"],
|
100
|
+
"source"=> ["empty", "string"],
|
101
|
+
"files"=> ["json", "string"],
|
102
|
+
"images"=> ["json", "string"],
|
103
|
+
"videos"=> ["json", "string"],
|
104
|
+
"audios"=> ["json", "string"],
|
105
|
+
"links"=> ["json", "string"],
|
106
|
+
"domain"=> ["empty", "string"],
|
107
|
+
"keywords"=> ["json", "string"],
|
108
|
+
"lang"=> ["empty", "string"],
|
109
|
+
"country_cn"=> ["empty", "string"],
|
110
|
+
"country_code"=> ["empty", "string"],
|
111
|
+
"created_at"=> ["empty", "int", "length:13"],
|
112
|
+
"updated_at"=> ["empty", "int", "length:13"],
|
113
|
+
"created_time"=> ["empty", "int", "length:10"],
|
114
|
+
"oss_files"=> ["json", "string"],
|
115
|
+
"oss_images"=> ["json", "string"],
|
116
|
+
"topics"=> ["json", "string"],
|
117
|
+
"tags"=> ["string", "json"],
|
118
|
+
"authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
|
119
|
+
"timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
|
120
|
+
"timezone_location"=> ["empty", "string"],
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
class ThinktankInformation < BaseModel
|
2
|
+
attr_accessor :id
|
3
|
+
attr_accessor :title
|
4
|
+
attr_accessor :site_name
|
5
|
+
attr_accessor :site_name_cn
|
6
|
+
attr_accessor :abstract
|
7
|
+
attr_accessor :content
|
8
|
+
attr_accessor :author_names
|
9
|
+
attr_accessor :state_info
|
10
|
+
attr_accessor :source
|
11
|
+
attr_accessor :files
|
12
|
+
attr_accessor :images
|
13
|
+
attr_accessor :videos
|
14
|
+
attr_accessor :audios
|
15
|
+
attr_accessor :links
|
16
|
+
attr_accessor :domain
|
17
|
+
attr_accessor :keywords
|
18
|
+
attr_accessor :html_content
|
19
|
+
attr_accessor :lang
|
20
|
+
attr_accessor :country_cn
|
21
|
+
attr_accessor :country_code
|
22
|
+
attr_accessor :created_at
|
23
|
+
attr_accessor :updated_at
|
24
|
+
attr_accessor :created_time
|
25
|
+
attr_accessor :oss_files
|
26
|
+
attr_accessor :oss_images
|
27
|
+
attr_accessor :customer_category
|
28
|
+
attr_accessor :category
|
29
|
+
attr_accessor :topics
|
30
|
+
attr_accessor :tags
|
31
|
+
attr_accessor :views
|
32
|
+
attr_accessor :comments
|
33
|
+
attr_accessor :reference
|
34
|
+
attr_accessor :mention_country
|
35
|
+
attr_accessor :authors
|
36
|
+
attr_accessor :sub_title
|
37
|
+
attr_accessor :timezone
|
38
|
+
attr_accessor :timezone_location
|
39
|
+
|
40
|
+
def self.table_name
|
41
|
+
return "thinktank_informations"
|
42
|
+
end
|
43
|
+
register
|
44
|
+
|
45
|
+
|
46
|
+
def initialize(options={})
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
def as_json
|
51
|
+
return {
|
52
|
+
id: @id,
|
53
|
+
title: @title,
|
54
|
+
site_name: @site_name,
|
55
|
+
site_name_cn: @site_name_cn,
|
56
|
+
abstract: @abstract,
|
57
|
+
content: @content,
|
58
|
+
author_names: @author_names,
|
59
|
+
state_info: @state_info,
|
60
|
+
source: @source,
|
61
|
+
files: @files,
|
62
|
+
images: @images,
|
63
|
+
videos: @videos,
|
64
|
+
links: @links,
|
65
|
+
domain: @domain,
|
66
|
+
keywords: @keywords,
|
67
|
+
html_content: @html_content,
|
68
|
+
lang: @lang,
|
69
|
+
country_cn: @country_cn,
|
70
|
+
country_code: @country_code,
|
71
|
+
created_at: @created_at,
|
72
|
+
updated_at: @updated_at,
|
73
|
+
created_time: @created_time,
|
74
|
+
oss_files: @oss_files,
|
75
|
+
oss_images: @oss_images,
|
76
|
+
customer_category: @customer_category,
|
77
|
+
category: @category,
|
78
|
+
topics: @topics,
|
79
|
+
tags: @tags,
|
80
|
+
views: @views,
|
81
|
+
comments: @comments,
|
82
|
+
reference: @reference,
|
83
|
+
mention_country: @mention_country,
|
84
|
+
authors: @authors,
|
85
|
+
sub_title: @sub_title,
|
86
|
+
timezone: @timezone,
|
87
|
+
timezone_location: @timezone_location
|
88
|
+
|
89
|
+
}
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_json
|
93
|
+
return as_json.to_json
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.verify_keys
|
97
|
+
return {
|
98
|
+
"id"=> ["empty", "string"],
|
99
|
+
"title"=> ["empty", "string"],
|
100
|
+
"site_name"=> ["empty", "string"],
|
101
|
+
"site_name_cn"=> ["empty", "string"],
|
102
|
+
"content"=> ["empty", "string"],
|
103
|
+
"source"=> ["empty", "string"],
|
104
|
+
"files"=> ["json", "string"],
|
105
|
+
"images"=> ["json", "string"],
|
106
|
+
"videos"=> ["json", "string"],
|
107
|
+
"audios"=> ["json", "string"],
|
108
|
+
"links"=> ["json", "string"],
|
109
|
+
"domain"=> ["empty", "string"],
|
110
|
+
"keywords"=> ["json", "string"],
|
111
|
+
"lang"=> ["empty", "string"],
|
112
|
+
"country_cn"=> ["empty", "string"],
|
113
|
+
"country_code"=> ["empty", "string"],
|
114
|
+
"created_at"=> ["empty", "int", "length:13"],
|
115
|
+
"updated_at"=> ["empty", "int", "length:13"],
|
116
|
+
"created_time"=> ["empty", "int", "length:10"],
|
117
|
+
"oss_files"=> ["json", "string"],
|
118
|
+
"oss_images"=> ["json", "string"],
|
119
|
+
"topics"=> ["json", "string"],
|
120
|
+
"tags"=> ["string", "json"],
|
121
|
+
"authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
|
122
|
+
"timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
|
123
|
+
"timezone_location"=> ["empty", "string"],
|
124
|
+
}
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
class ThinktankReport < BaseModel
|
2
|
+
attr_accessor :id
|
3
|
+
attr_accessor :title
|
4
|
+
attr_accessor :site_name
|
5
|
+
attr_accessor :site_name_cn
|
6
|
+
attr_accessor :abstract
|
7
|
+
attr_accessor :content
|
8
|
+
attr_accessor :author_names
|
9
|
+
attr_accessor :source
|
10
|
+
attr_accessor :files
|
11
|
+
attr_accessor :images
|
12
|
+
attr_accessor :videos
|
13
|
+
attr_accessor :audios
|
14
|
+
attr_accessor :links
|
15
|
+
attr_accessor :domain
|
16
|
+
attr_accessor :keywords
|
17
|
+
attr_accessor :html_content
|
18
|
+
attr_accessor :lang
|
19
|
+
attr_accessor :country_cn
|
20
|
+
attr_accessor :country_code
|
21
|
+
attr_accessor :created_at
|
22
|
+
attr_accessor :updated_at
|
23
|
+
attr_accessor :created_time
|
24
|
+
attr_accessor :oss_files
|
25
|
+
attr_accessor :oss_images
|
26
|
+
attr_accessor :customer_category
|
27
|
+
attr_accessor :category
|
28
|
+
attr_accessor :topics
|
29
|
+
attr_accessor :tags
|
30
|
+
attr_accessor :views
|
31
|
+
attr_accessor :comments
|
32
|
+
attr_accessor :reference
|
33
|
+
attr_accessor :mention_country
|
34
|
+
attr_accessor :authors
|
35
|
+
attr_accessor :sub_title
|
36
|
+
attr_accessor :timezone
|
37
|
+
attr_accessor :timezone_location
|
38
|
+
|
39
|
+
def self.table_name
|
40
|
+
return "thinktank_reports"
|
41
|
+
end
|
42
|
+
register
|
43
|
+
|
44
|
+
|
45
|
+
def initialize(options = {})
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_json
|
50
|
+
return as_json.to_json
|
51
|
+
end
|
52
|
+
|
53
|
+
def as_json
|
54
|
+
return {
|
55
|
+
id: @id,
|
56
|
+
title: @title,
|
57
|
+
site_name: @site_name,
|
58
|
+
site_name_cn: @site_name_cn,
|
59
|
+
abstract: @abstract,
|
60
|
+
content: @content,
|
61
|
+
author_names: @author_names,
|
62
|
+
source: @source,
|
63
|
+
files: @files,
|
64
|
+
images: @images,
|
65
|
+
videos: @videos,
|
66
|
+
audios: @audios,
|
67
|
+
links: @links,
|
68
|
+
domain: @domain,
|
69
|
+
keywords: @keywords,
|
70
|
+
html_content: @html_content,
|
71
|
+
lang: @lang,
|
72
|
+
country_cn: @country_cn,
|
73
|
+
country_code: @country_code,
|
74
|
+
created_at: @created_at,
|
75
|
+
updated_at: @updated_at,
|
76
|
+
created_time: @created_time,
|
77
|
+
oss_files: @oss_files,
|
78
|
+
oss_images: @oss_images,
|
79
|
+
customer_category: @customer_category,
|
80
|
+
category: @category,
|
81
|
+
topics: @topics,
|
82
|
+
tags: @tags,
|
83
|
+
views: @views,
|
84
|
+
comments: @comments,
|
85
|
+
reference: @reference,
|
86
|
+
mention_country: @mention_country,
|
87
|
+
authors: @authors,
|
88
|
+
sub_title: @sub_title,
|
89
|
+
timezone: @timezone,
|
90
|
+
timezone_location: @timezone_location
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.verify_keys
|
95
|
+
return {
|
96
|
+
"id"=> ["empty", "string"],
|
97
|
+
"title"=> ["empty", "string"],
|
98
|
+
"site_name"=> ["empty", "string"],
|
99
|
+
"site_name_cn"=> ["empty", "string"],
|
100
|
+
"content"=> ["empty", "string"],
|
101
|
+
"source"=> ["empty", "string"],
|
102
|
+
"files"=> ["json", "string"],
|
103
|
+
"images"=> ["json", "string"],
|
104
|
+
"videos"=> ["json", "string"],
|
105
|
+
"audios"=> ["json", "string"],
|
106
|
+
"links"=> ["json", "string"],
|
107
|
+
"domain"=> ["empty", "string"],
|
108
|
+
"keywords"=> ["json", "string"],
|
109
|
+
"lang"=> ["empty", "string"],
|
110
|
+
"country_cn"=> ["empty", "string"],
|
111
|
+
"country_code"=> ["empty", "string"],
|
112
|
+
"created_at"=> ["empty", "int", "length:13"],
|
113
|
+
"updated_at"=> ["empty", "int", "length:13"],
|
114
|
+
"created_time"=> ["empty", "int", "length:10"],
|
115
|
+
"oss_files"=> ["json", "string"],
|
116
|
+
"oss_images"=> ["json", "string"],
|
117
|
+
"topics"=> ["json", "string"],
|
118
|
+
"tags"=> ["string", "json"],
|
119
|
+
"authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
|
120
|
+
"timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
|
121
|
+
"timezone_location"=> ["empty", "string"],
|
122
|
+
}
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class Verify
|
2
|
+
def self.IsVerified?(items=[],table_name="")
|
3
|
+
|
4
|
+
if table_name == "" && items.size > 0
|
5
|
+
table_name = switch_table(items[0])
|
6
|
+
end
|
7
|
+
|
8
|
+
if table_name == ""
|
9
|
+
puts "未匹配,不验证"
|
10
|
+
else
|
11
|
+
puts "验证:#{table_name}"
|
12
|
+
end
|
13
|
+
|
14
|
+
if table_name != ""
|
15
|
+
kclass = Object.const_get $map_models[table_name]
|
16
|
+
|
17
|
+
if kclass == nil
|
18
|
+
puts "ERROR #{table_name} 验证 not Exist!"
|
19
|
+
return true
|
20
|
+
end
|
21
|
+
|
22
|
+
items.each do |item|
|
23
|
+
if !kclass.new(item).verify
|
24
|
+
return false
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
return true
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.switch_table(item={})
|
34
|
+
table_name = ""
|
35
|
+
|
36
|
+
item_keys = JSON.parse(item.to_json).keys
|
37
|
+
# puts "----#{item_keys}--"
|
38
|
+
|
39
|
+
$map_models.each do |k,v|
|
40
|
+
klass = Object.const_get v
|
41
|
+
klass_keys = klass.fields
|
42
|
+
|
43
|
+
# puts "----#{klass_keys}--"
|
44
|
+
sub_keys = item_keys - klass_keys
|
45
|
+
if sub_keys.size == 0 && item_keys.size == klass_keys.size
|
46
|
+
return k
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
return table_name
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
data/ruby_sdk_test.rb
CHANGED
@@ -10,6 +10,11 @@ def main
|
|
10
10
|
|
11
11
|
CrawlabRubySdk.save_item(a)
|
12
12
|
|
13
|
+
# CrawlabRubySdk.save_item(a,"thinktank_experts")
|
14
|
+
# CrawlabRubySdk.save_item(a,"thinktank_expert_reports")
|
15
|
+
# CrawlabRubySdk.save_item(a,"thinktank_informations")
|
16
|
+
# CrawlabRubySdk.save_item(a,"thinktank_reports")
|
17
|
+
|
13
18
|
item = {
|
14
19
|
"id": "dec9d5415409cc9275f5590c145c3ccf",
|
15
20
|
"title": "Association of Selected State Policies and Requirements for Buprenorphine Treatment With Per Capita Months of Treatment",
|
@@ -50,7 +55,6 @@ def main
|
|
50
55
|
}
|
51
56
|
CrawlabRubySdk.save_item(item)
|
52
57
|
|
53
|
-
|
54
58
|
end
|
55
59
|
|
56
|
-
main
|
60
|
+
# main
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawlab_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- min
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: grpc
|
@@ -90,12 +90,14 @@ files:
|
|
90
90
|
- lib/entity/request_pb.rb
|
91
91
|
- lib/entity/response_code_pb.rb
|
92
92
|
- lib/entity/response_pb.rb
|
93
|
-
- lib/entity/result_pb.rb
|
94
93
|
- lib/entity/stream_message_code_pb.rb
|
95
|
-
- lib/entity/stream_message_data_task_pb.rb
|
96
94
|
- lib/entity/stream_message_pb.rb
|
97
|
-
- lib/models/
|
98
|
-
- lib/models/
|
95
|
+
- lib/models/base.rb
|
96
|
+
- lib/models/thinktank_expert.rb
|
97
|
+
- lib/models/thinktank_expert_report.rb
|
98
|
+
- lib/models/thinktank_information.rb
|
99
|
+
- lib/models/thinktank_report.rb
|
100
|
+
- lib/verify/verify.rb
|
99
101
|
- ruby_sdk_test.rb
|
100
102
|
homepage: https://github.com/rich-bro/crawlab_ruby_sdk
|
101
103
|
licenses: []
|
data/lib/entity/result_pb.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'google/protobuf'
|
2
|
-
|
3
|
-
Google::Protobuf::DescriptorPool.generated_pool.build do
|
4
|
-
add_file("entity/result.proto", :syntax => :proto3) do
|
5
|
-
add_message "grpc.Result" do
|
6
|
-
optional :_tid, :string, 1
|
7
|
-
optional :name, :string, 2
|
8
|
-
optional :age, :string, 3
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
module Grpc
|
14
|
-
Result = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Result").msgclass
|
15
|
-
end
|
@@ -1,58 +0,0 @@
|
|
1
|
-
# # Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
-
# # source: entity/stream_message_data_task.proto
|
3
|
-
|
4
|
-
# require 'google/protobuf'
|
5
|
-
|
6
|
-
# Google::Protobuf::DescriptorPool.generated_pool.build do
|
7
|
-
# add_file(PWD + "/proto/entity/stream_message_data_task.proto", :syntax => :proto3) do
|
8
|
-
# add_message "grpc.StreamMessageDataTask" do
|
9
|
-
# optional :task_id, :string, 1
|
10
|
-
# optional :data, :string,:repeated, 2
|
11
|
-
# end
|
12
|
-
# end
|
13
|
-
# end
|
14
|
-
|
15
|
-
# module Grpc
|
16
|
-
# StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
|
17
|
-
# end
|
18
|
-
|
19
|
-
|
20
|
-
# frozen_string_literal: true
|
21
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
22
|
-
# source: entity/stream_message_data_task.proto
|
23
|
-
|
24
|
-
require 'google/protobuf'
|
25
|
-
|
26
|
-
# require 'entity/result_pb'
|
27
|
-
|
28
|
-
|
29
|
-
descriptor_data = "\n%entity/stream_message_data_task.proto\x12\x04grpc\x1a\x13\x65ntity/result.proto\"D\n\x15StreamMessageDataTask\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x04\x64\x61ta\x18\x02 \x03(\x0b\x32\x0c.grpc.ResultB\x08Z\x06.;grpcb\x06proto3"
|
30
|
-
|
31
|
-
pool = Google::Protobuf::DescriptorPool.generated_pool
|
32
|
-
|
33
|
-
begin
|
34
|
-
pool.add_serialized_file(descriptor_data)
|
35
|
-
rescue TypeError => e
|
36
|
-
# Compatibility code: will be removed in the next major version.
|
37
|
-
require 'google/protobuf/descriptor_pb'
|
38
|
-
parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
|
39
|
-
parsed.clear_dependency
|
40
|
-
serialized = parsed.class.encode(parsed)
|
41
|
-
file = pool.add_serialized_file(serialized)
|
42
|
-
warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
|
43
|
-
imports = [
|
44
|
-
["grpc.Result", "entity/result.proto"],
|
45
|
-
]
|
46
|
-
imports.each do |type_name, expected_filename|
|
47
|
-
import_file = pool.lookup(type_name).file_descriptor
|
48
|
-
if import_file.name != expected_filename
|
49
|
-
warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
|
50
|
-
end
|
51
|
-
end
|
52
|
-
warn "Each proto file must use a consistent fully-qualified name."
|
53
|
-
warn "This will become an error in the next major version."
|
54
|
-
end
|
55
|
-
|
56
|
-
module Grpc
|
57
|
-
StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
|
58
|
-
end
|
data/lib/models/node_pb.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
-
# source: models/node.proto
|
3
|
-
|
4
|
-
require 'google/protobuf'
|
5
|
-
|
6
|
-
Google::Protobuf::DescriptorPool.generated_pool.build do
|
7
|
-
add_file("proto/models/node.proto", :syntax => :proto3) do
|
8
|
-
add_message "grpc.Node" do
|
9
|
-
optional :_id, :string, 1
|
10
|
-
optional :name, :string, 2
|
11
|
-
optional :ip, :string, 3
|
12
|
-
optional :port, :string, 5
|
13
|
-
optional :mac, :string, 6
|
14
|
-
optional :hostname, :string, 7
|
15
|
-
optional :description, :string, 8
|
16
|
-
optional :key, :string, 9
|
17
|
-
optional :is_master, :bool, 11
|
18
|
-
optional :update_ts, :string, 12
|
19
|
-
optional :create_ts, :string, 13
|
20
|
-
optional :update_ts_unix, :int64, 14
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
module Grpc
|
26
|
-
Node = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Node").msgclass
|
27
|
-
end
|
data/lib/models/task_pb.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
-
# source: models/task.proto
|
3
|
-
|
4
|
-
require 'google/protobuf'
|
5
|
-
|
6
|
-
Google::Protobuf::DescriptorPool.generated_pool.build do
|
7
|
-
add_file("models/task.proto", :syntax => :proto3) do
|
8
|
-
add_message "grpc.Task" do
|
9
|
-
optional :_id, :string, 1
|
10
|
-
optional :spider_id, :string, 2
|
11
|
-
optional :status, :string, 5
|
12
|
-
optional :node_id, :string, 6
|
13
|
-
optional :cmd, :string, 8
|
14
|
-
optional :param, :string, 9
|
15
|
-
optional :error, :string, 10
|
16
|
-
optional :pid, :int32, 16
|
17
|
-
optional :run_type, :string, 17
|
18
|
-
optional :schedule_id, :string, 18
|
19
|
-
optional :type, :string, 19
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
module Grpc
|
25
|
-
Task = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Task").msgclass
|
26
|
-
end
|