crawlab_ruby_sdk 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3fc9c6d1277660d63c17811f472dd56d438a4998c6d0c03e99d1a9e5c0357868
4
- data.tar.gz: 20b8ffa1ba0b1275c66f1a79cc80a479d39211b781174107a4cc21bf8ca86fb1
3
+ metadata.gz: 1c85282caf96edf6a9100a70b045bb1004a471db9d2a7842f83e77a28fbde32e
4
+ data.tar.gz: cc64ce0004eeede08da51a5a7eec294f0db1b494d897079562a1578128960a1e
5
5
  SHA512:
6
- metadata.gz: 5b925b182c7fa7fcbd8d7c9d3a2883e8acee1fa41eafc01a28aaa1ba78c037f8d17f1fed831846001b1761b1480854e908732ac5e8e3b500828267baebc1721c
7
- data.tar.gz: ae63a9d061c104cf8ad8c3e9c4d72b1a70f9f21dbe4c7b07d299ee5cfafe048a008f16ee26ed94efda8b64542aed817f33c2dbdd0323cab28d0c2ce1d8cd67ea
6
+ metadata.gz: 012211e75c7de44e0493c931ce3d6d4cd8f1351da52ae4bd77da3aca95e39294d3c4c9e2c5cf2d5ebd877dcb1dbd2e45bc9f9b8ce2199ffdf3d71c3f6de517a4
7
+ data.tar.gz: 380d04dddd44c608e80eebc23f601f23acaacc08fe291e6a711749e65b00d7aef664a8d30a0a0537bfec82c4e02620da3dfdbcf259bd1bd6aecc2bebaaf821d2
@@ -1,3 +1,3 @@
1
1
  module CrawlabRubySdk
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
@@ -1,4 +1,4 @@
1
- require "crawlab_ruby_sdk/version"
1
+ # require "crawlab_ruby_sdk/version"
2
2
  require "grpc"
3
3
  require "json"
4
4
  def traverse_dir(file_path)
@@ -14,17 +14,18 @@ dir = File.expand_path("..", __FILE__)
14
14
  # puts dir
15
15
 
16
16
  traverse_dir(dir+'/entity/stream_message_code_pb.rb')
17
- traverse_dir(dir+'/entity/result_pb.rb')
18
17
  traverse_dir(dir+'/entity/stream_message_pb.rb')
19
- traverse_dir(dir+'/entity/stream_message_data_task_pb.rb')
20
18
  traverse_dir(dir+'/client')
19
+ traverse_dir(dir+'/models/base.rb')
20
+ traverse_dir(dir+'/models')
21
+ traverse_dir(dir+'/verify')
21
22
 
22
23
  module CrawlabRubySdk
23
24
  class Error < StandardError; end
24
25
  # Your code goes here...
25
26
 
26
27
 
27
- def self.save_item(item={})
28
+ def self.save_item(item={},table_name="")
28
29
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
29
30
  if address==nil || address == ""
30
31
  address = "localhost:9666"
@@ -34,6 +35,10 @@ module CrawlabRubySdk
34
35
  if auth==nil || auth == ""
35
36
  auth = "Crawlab2021!"
36
37
  end
38
+
39
+ if !Verify.IsVerified?([item],table_name)
40
+ return
41
+ end
37
42
 
38
43
  client = TaskServiceClient.new(address,auth)
39
44
 
@@ -42,7 +47,7 @@ module CrawlabRubySdk
42
47
  save(sub_client,[item])
43
48
  end
44
49
 
45
- def self.save_items(items=[])
50
+ def self.save_items(items=[],table_name="")
46
51
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
47
52
  if address==nil || address == ""
48
53
  address = "localhost:9666"
@@ -52,6 +57,9 @@ module CrawlabRubySdk
52
57
  if auth==nil || auth == ""
53
58
  auth = "Crawlab2021!"
54
59
  end
60
+ if !Verify.IsVerified?([item],table_name)
61
+ return
62
+ end
55
63
 
56
64
  client = TaskServiceClient.new(address,auth)
57
65
 
@@ -90,15 +98,11 @@ module CrawlabRubySdk
90
98
  end
91
99
 
92
100
  data = {task_id: task_id,data:records}.to_json.b
93
- # data = data.encode("utf-8")
94
- # puts data
95
-
96
- # data = data.encode('ASCII-8BIT', invalid: :replace, undef: :replace, replace: '')
97
- # puts data
98
101
 
99
102
  msg = Grpc::StreamMessage.new(code:3,data:data)
103
+ puts data
100
104
 
101
- sub_client.Send([msg])
105
+ # sub_client.Send([msg])
102
106
  end
103
107
 
104
108
  def self.get_task_id
@@ -0,0 +1,188 @@
1
+ require 'json'
2
+
3
+ $map_models = {}
4
+ class BaseModel
5
+ def self.table_name
6
+ return ""
7
+ end
8
+
9
+ def self.register
10
+ $map_models[table_name] = self.name
11
+ end
12
+
13
+ def self.fields
14
+ JSON.parse(self.new({}).as_json.to_json).keys
15
+ end
16
+
17
+ def self.verify_keys
18
+ return {}
19
+ end
20
+
21
+ def as_json
22
+ {}
23
+ end
24
+
25
+ def verify
26
+ json_data = as_json
27
+ verify_keys = self.class.verify_keys
28
+
29
+ verify_keys.each do |key,fns|
30
+ value = json_data[key.to_sym]
31
+ if fns.class.name == "Array"
32
+ fns.each do |fn|
33
+ if fn == "empty"
34
+ if !verify_empty(value)
35
+ puts "ERROR: #{key} cannot be empty!"
36
+ return false
37
+ end
38
+ elsif fn == "json"
39
+ if !verify_json(value)
40
+ puts "ERROR: #{key} json string parse fail!"
41
+ return false
42
+ end
43
+ elsif fn == "string"
44
+ if !verify_string(value)
45
+ puts "ERROR: #{key} field type is not string!"
46
+ return false
47
+ end
48
+ elsif fn == "int"
49
+ if !verify_int(value)
50
+ puts "ERROR: #{key} field type is not int!"
51
+ return false
52
+ end
53
+ elsif fn.include?("regex")
54
+ if !verify_regex(fn,value)
55
+ puts "ERROR: #{key} regex #{fn} match error"
56
+ return false
57
+ end
58
+ elsif fn.include?("length")
59
+ if !verify_length(fn,value)
60
+ puts "ERROR: #{key} length must be #{fn}"
61
+ return false
62
+ end
63
+ elsif fn.include?("fields")
64
+ if !verify_fields(fn,value)
65
+ puts "ERROR: #{key}:#{fn} not Exist!"
66
+ return false
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def verify_empty(v)
75
+ if v == nil || v == ""
76
+ return false
77
+ end
78
+ return true
79
+ end
80
+
81
+ def verify_json(v)
82
+ begin
83
+ JSON.parse(v)
84
+ return true
85
+ rescue StandardError => e
86
+ return false
87
+ end
88
+ end
89
+
90
+ def verify_string(v)
91
+ if v == nil
92
+ return false
93
+ end
94
+ if v.class.name == "String"
95
+ return true
96
+ end
97
+ return false
98
+ end
99
+
100
+ def verify_int(v)
101
+ if v == nil
102
+ return false
103
+ end
104
+
105
+ if v.class.name == "Integer"
106
+ return true
107
+ end
108
+ return false
109
+ end
110
+
111
+ def verify_regex(fn,v)
112
+ if fn.class.name != "String"
113
+ return false
114
+ end
115
+ regex_arr = fn.split(":")
116
+ if regex_arr.size < 2
117
+ return false
118
+ end
119
+ regex_str = regex_arr[1]
120
+ if regex_str.size == 0
121
+ return false
122
+ end
123
+
124
+ if v == nil || v == ""
125
+ return false
126
+ end
127
+
128
+ if v =~ Regexp.new(regex_str)
129
+ return true
130
+ else
131
+ return false
132
+ end
133
+ end
134
+
135
+ def verify_length(fn,v)
136
+ if fn.class.name != "String"
137
+ return false
138
+ end
139
+ length_arr = fn.split(":")
140
+ if length_arr.size < 2
141
+ return false
142
+ end
143
+ length = length_arr[1].to_i
144
+ if length == 0
145
+ return false
146
+ end
147
+
148
+ if v == nil
149
+ return false
150
+ end
151
+
152
+ if v.to_s.size == length
153
+ return true
154
+ end
155
+
156
+ return false
157
+ end
158
+
159
+ def verify_fields(fn,v)
160
+ if fn.class.name != "String"
161
+ return false
162
+ end
163
+ fields_arr = fn.split(":")
164
+ if length_arr.size < 2
165
+ return false
166
+ end
167
+ fields_str = fields_arr[1]
168
+
169
+ fields = fields_str.split(",")
170
+
171
+ begin
172
+ datas = json.parse(v)
173
+ datas.each do |data|
174
+ fields.each do |field|
175
+ if data[field] == nil
176
+ return false
177
+ end
178
+ end
179
+ end
180
+ rescue StandardError => e
181
+ return false
182
+ end
183
+
184
+ return true
185
+ end
186
+
187
+
188
+ end
@@ -0,0 +1,107 @@
1
+ class ThinktankExpert < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :name
4
+ attr_accessor :title
5
+ attr_accessor :content
6
+ attr_accessor :location
7
+ attr_accessor :area_of_expertise
8
+ attr_accessor :profile_images
9
+ attr_accessor :phone
10
+ attr_accessor :email
11
+ attr_accessor :link
12
+ attr_accessor :audios
13
+ attr_accessor :videos
14
+ attr_accessor :education
15
+ attr_accessor :related_topics
16
+ attr_accessor :site_name
17
+ attr_accessor :site_name_cn
18
+ attr_accessor :domain
19
+ attr_accessor :created_at
20
+ attr_accessor :updated_at
21
+ attr_accessor :source
22
+ attr_accessor :oss_profile_images
23
+ attr_accessor :facebook
24
+ attr_accessor :twitter
25
+ attr_accessor :linkedin
26
+ attr_accessor :instagram
27
+ attr_accessor :wikidata
28
+ attr_accessor :person_type
29
+ attr_accessor :files
30
+ attr_accessor :oss_files
31
+ attr_accessor :associated_program
32
+ attr_accessor :lang
33
+ attr_accessor :website
34
+ attr_accessor :nationalities
35
+
36
+
37
+ def self.table_name
38
+ return "thinktank_experts"
39
+ end
40
+ register
41
+
42
+
43
+ def initialize(options={})
44
+
45
+ end
46
+
47
+ def as_json
48
+ return {
49
+ id: @id,
50
+ name: @name,
51
+ title: @title,
52
+ content: @content,
53
+ location: @location,
54
+ area_of_expertise: @area_of_expertise,
55
+ profile_images: @profile_images,
56
+ phone: @phone,
57
+ email: @email,
58
+ link: @link,
59
+ audios: @audios,
60
+ videos: @videos,
61
+ education: @education,
62
+ related_topics: @related_topics,
63
+ site_name: @site_name,
64
+ site_name_cn: @site_name_cn,
65
+ domain: @domain,
66
+ created_at: @created_at,
67
+ updated_at: @updated_at,
68
+ source: @source,
69
+ oss_profile_images: @oss_profile_images,
70
+ facebook: @facebook,
71
+ twitter: @twitter,
72
+ linkedin: @linkedin,
73
+ instagram: @instagram,
74
+ wikidata: @wikidata,
75
+ person_type: @person_type,
76
+ files: @files,
77
+ oss_files: @oss_files,
78
+ associated_program: @associated_program,
79
+ lang: @lang,
80
+ website: @website,
81
+ nationalities: @nationalities,
82
+ }
83
+ end
84
+
85
+ def to_json
86
+ return as_json.to_json
87
+ end
88
+
89
+ def self.verify_keys
90
+ return {
91
+ "id"=> ["empty", "string"],
92
+ "title"=> ["json", "string"],
93
+ "name"=> ["empty", "string"],
94
+ "site_name_cn"=>["empty", "string"],
95
+ "site_name"=> ["empty", "string"],
96
+ "source"=> ["empty", "string"],
97
+ "audios"=> ["json", "string"],
98
+ "videos"=> ["json", "string"],
99
+ "related_topics"=>["json", "string"],
100
+ "files"=> ["json", "string"],
101
+ "oss_files"=> ["json", "string"],
102
+ "domain"=> ["empty", "string"],
103
+ "created_at"=> ["empty", "int", "length:13"],
104
+ "updated_at"=> ["empty", "int", "length:13"],
105
+ }
106
+ end
107
+ end
@@ -0,0 +1,124 @@
1
+ class ThinktankExpertReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_expert_reports"
41
+ end
42
+ register
43
+
44
+ def initialize(options={})
45
+
46
+ end
47
+
48
+ def as_json
49
+ return {
50
+ id: @id,
51
+ title: @title,
52
+ site_name: @site_name,
53
+ site_name_cn: @site_name_cn,
54
+ abstract: @abstract,
55
+ content: @content,
56
+ author_names: @author_names,
57
+ source: @source,
58
+ files: @files,
59
+ images: @images,
60
+ videos: @videos,
61
+ audios: @audios,
62
+ links: @links,
63
+ domain: @domain,
64
+ keywords: @keywords,
65
+ html_content: @html_content,
66
+ lang: @lang,
67
+ country_cn: @country_cn,
68
+ country_code: @country_code,
69
+ created_at: @created_at,
70
+ updated_at: @updated_at,
71
+ created_time: @created_time,
72
+ oss_files: @oss_files,
73
+ oss_images: @oss_images,
74
+ customer_category: @customer_category,
75
+ category: @category,
76
+ topics: @topics,
77
+ tags: @tags,
78
+ views: @views,
79
+ comments: @comments,
80
+ reference: @reference,
81
+ mention_country: @mention_country,
82
+ authors: @authors,
83
+ sub_title: @sub_title,
84
+ timezone: @timezone,
85
+ timezone_location: @timezone_location,
86
+ }
87
+ end
88
+
89
+ def to_json
90
+ return as_json.to_json
91
+ end
92
+
93
+ def self.verify_keys
94
+ return {
95
+ "id"=> ["empty", "string"],
96
+ "title"=> ["empty", "string"],
97
+ "site_name"=> ["empty", "string"],
98
+ "site_name_cn"=> ["empty", "string"],
99
+ "content"=> ["empty", "string"],
100
+ "source"=> ["empty", "string"],
101
+ "files"=> ["json", "string"],
102
+ "images"=> ["json", "string"],
103
+ "videos"=> ["json", "string"],
104
+ "audios"=> ["json", "string"],
105
+ "links"=> ["json", "string"],
106
+ "domain"=> ["empty", "string"],
107
+ "keywords"=> ["json", "string"],
108
+ "lang"=> ["empty", "string"],
109
+ "country_cn"=> ["empty", "string"],
110
+ "country_code"=> ["empty", "string"],
111
+ "created_at"=> ["empty", "int", "length:13"],
112
+ "updated_at"=> ["empty", "int", "length:13"],
113
+ "created_time"=> ["empty", "int", "length:10"],
114
+ "oss_files"=> ["json", "string"],
115
+ "oss_images"=> ["json", "string"],
116
+ "topics"=> ["json", "string"],
117
+ "tags"=> ["string", "json"],
118
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
119
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
120
+ "timezone_location"=> ["empty", "string"],
121
+ }
122
+ end
123
+
124
+ end
@@ -0,0 +1,126 @@
1
+ class ThinktankInformation < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :state_info
10
+ attr_accessor :source
11
+ attr_accessor :files
12
+ attr_accessor :images
13
+ attr_accessor :videos
14
+ attr_accessor :audios
15
+ attr_accessor :links
16
+ attr_accessor :domain
17
+ attr_accessor :keywords
18
+ attr_accessor :html_content
19
+ attr_accessor :lang
20
+ attr_accessor :country_cn
21
+ attr_accessor :country_code
22
+ attr_accessor :created_at
23
+ attr_accessor :updated_at
24
+ attr_accessor :created_time
25
+ attr_accessor :oss_files
26
+ attr_accessor :oss_images
27
+ attr_accessor :customer_category
28
+ attr_accessor :category
29
+ attr_accessor :topics
30
+ attr_accessor :tags
31
+ attr_accessor :views
32
+ attr_accessor :comments
33
+ attr_accessor :reference
34
+ attr_accessor :mention_country
35
+ attr_accessor :authors
36
+ attr_accessor :sub_title
37
+ attr_accessor :timezone
38
+ attr_accessor :timezone_location
39
+
40
+ def self.table_name
41
+ return "thinktank_informations"
42
+ end
43
+ register
44
+
45
+
46
+ def initialize(options={})
47
+
48
+ end
49
+
50
+ def as_json
51
+ return {
52
+ id: @id,
53
+ title: @title,
54
+ site_name: @site_name,
55
+ site_name_cn: @site_name_cn,
56
+ abstract: @abstract,
57
+ content: @content,
58
+ author_names: @author_names,
59
+ state_info: @state_info,
60
+ source: @source,
61
+ files: @files,
62
+ images: @images,
63
+ videos: @videos,
64
+ links: @links,
65
+ domain: @domain,
66
+ keywords: @keywords,
67
+ html_content: @html_content,
68
+ lang: @lang,
69
+ country_cn: @country_cn,
70
+ country_code: @country_code,
71
+ created_at: @created_at,
72
+ updated_at: @updated_at,
73
+ created_time: @created_time,
74
+ oss_files: @oss_files,
75
+ oss_images: @oss_images,
76
+ customer_category: @customer_category,
77
+ category: @category,
78
+ topics: @topics,
79
+ tags: @tags,
80
+ views: @views,
81
+ comments: @comments,
82
+ reference: @reference,
83
+ mention_country: @mention_country,
84
+ authors: @authors,
85
+ sub_title: @sub_title,
86
+ timezone: @timezone,
87
+ timezone_location: @timezone_location
88
+
89
+ }
90
+ end
91
+
92
+ def to_json
93
+ return as_json.to_json
94
+ end
95
+
96
+ def self.verify_keys
97
+ return {
98
+ "id"=> ["empty", "string"],
99
+ "title"=> ["empty", "string"],
100
+ "site_name"=> ["empty", "string"],
101
+ "site_name_cn"=> ["empty", "string"],
102
+ "content"=> ["empty", "string"],
103
+ "source"=> ["empty", "string"],
104
+ "files"=> ["json", "string"],
105
+ "images"=> ["json", "string"],
106
+ "videos"=> ["json", "string"],
107
+ "audios"=> ["json", "string"],
108
+ "links"=> ["json", "string"],
109
+ "domain"=> ["empty", "string"],
110
+ "keywords"=> ["json", "string"],
111
+ "lang"=> ["empty", "string"],
112
+ "country_cn"=> ["empty", "string"],
113
+ "country_code"=> ["empty", "string"],
114
+ "created_at"=> ["empty", "int", "length:13"],
115
+ "updated_at"=> ["empty", "int", "length:13"],
116
+ "created_time"=> ["empty", "int", "length:10"],
117
+ "oss_files"=> ["json", "string"],
118
+ "oss_images"=> ["json", "string"],
119
+ "topics"=> ["json", "string"],
120
+ "tags"=> ["string", "json"],
121
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
122
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
123
+ "timezone_location"=> ["empty", "string"],
124
+ }
125
+ end
126
+ end
@@ -0,0 +1,125 @@
1
+ class ThinktankReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_reports"
41
+ end
42
+ register
43
+
44
+
45
+ def initialize(options = {})
46
+
47
+ end
48
+
49
+ def to_json
50
+ return as_json.to_json
51
+ end
52
+
53
+ def as_json
54
+ return {
55
+ id: @id,
56
+ title: @title,
57
+ site_name: @site_name,
58
+ site_name_cn: @site_name_cn,
59
+ abstract: @abstract,
60
+ content: @content,
61
+ author_names: @author_names,
62
+ source: @source,
63
+ files: @files,
64
+ images: @images,
65
+ videos: @videos,
66
+ audios: @audios,
67
+ links: @links,
68
+ domain: @domain,
69
+ keywords: @keywords,
70
+ html_content: @html_content,
71
+ lang: @lang,
72
+ country_cn: @country_cn,
73
+ country_code: @country_code,
74
+ created_at: @created_at,
75
+ updated_at: @updated_at,
76
+ created_time: @created_time,
77
+ oss_files: @oss_files,
78
+ oss_images: @oss_images,
79
+ customer_category: @customer_category,
80
+ category: @category,
81
+ topics: @topics,
82
+ tags: @tags,
83
+ views: @views,
84
+ comments: @comments,
85
+ reference: @reference,
86
+ mention_country: @mention_country,
87
+ authors: @authors,
88
+ sub_title: @sub_title,
89
+ timezone: @timezone,
90
+ timezone_location: @timezone_location
91
+ }
92
+ end
93
+
94
+ def self.verify_keys
95
+ return {
96
+ "id"=> ["empty", "string"],
97
+ "title"=> ["empty", "string"],
98
+ "site_name"=> ["empty", "string"],
99
+ "site_name_cn"=> ["empty", "string"],
100
+ "content"=> ["empty", "string"],
101
+ "source"=> ["empty", "string"],
102
+ "files"=> ["json", "string"],
103
+ "images"=> ["json", "string"],
104
+ "videos"=> ["json", "string"],
105
+ "audios"=> ["json", "string"],
106
+ "links"=> ["json", "string"],
107
+ "domain"=> ["empty", "string"],
108
+ "keywords"=> ["json", "string"],
109
+ "lang"=> ["empty", "string"],
110
+ "country_cn"=> ["empty", "string"],
111
+ "country_code"=> ["empty", "string"],
112
+ "created_at"=> ["empty", "int", "length:13"],
113
+ "updated_at"=> ["empty", "int", "length:13"],
114
+ "created_time"=> ["empty", "int", "length:10"],
115
+ "oss_files"=> ["json", "string"],
116
+ "oss_images"=> ["json", "string"],
117
+ "topics"=> ["json", "string"],
118
+ "tags"=> ["string", "json"],
119
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
120
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
121
+ "timezone_location"=> ["empty", "string"],
122
+ }
123
+ end
124
+
125
+ end
@@ -0,0 +1,53 @@
1
+ class Verify
2
+ def self.IsVerified?(items=[],table_name="")
3
+
4
+ if table_name == "" && items.size > 0
5
+ table_name = switch_table(items[0])
6
+ end
7
+
8
+ if table_name == ""
9
+ puts "未匹配,不验证"
10
+ else
11
+ puts "验证:#{table_name}"
12
+ end
13
+
14
+ if table_name != ""
15
+ kclass = Object.const_get $map_models[table_name]
16
+
17
+ if kclass == nil
18
+ puts "ERROR #{table_name} 验证 not Exist!"
19
+ return true
20
+ end
21
+
22
+ items.each do |item|
23
+ if !kclass.new(item).verify
24
+ return false
25
+ end
26
+ end
27
+ end
28
+
29
+
30
+ return true
31
+ end
32
+
33
+ def self.switch_table(item={})
34
+ table_name = ""
35
+
36
+ item_keys = JSON.parse(item.to_json).keys
37
+ # puts "----#{item_keys}--"
38
+
39
+ $map_models.each do |k,v|
40
+ klass = Object.const_get v
41
+ klass_keys = klass.fields
42
+
43
+ # puts "----#{klass_keys}--"
44
+ sub_keys = item_keys - klass_keys
45
+ if sub_keys.size == 0 && item_keys.size == klass_keys.size
46
+ return k
47
+ end
48
+ end
49
+
50
+ return table_name
51
+ end
52
+
53
+ end
data/ruby_sdk_test.rb CHANGED
@@ -10,6 +10,11 @@ def main
10
10
 
11
11
  CrawlabRubySdk.save_item(a)
12
12
 
13
+ # CrawlabRubySdk.save_item(a,"thinktank_experts")
14
+ # CrawlabRubySdk.save_item(a,"thinktank_expert_reports")
15
+ # CrawlabRubySdk.save_item(a,"thinktank_informations")
16
+ # CrawlabRubySdk.save_item(a,"thinktank_reports")
17
+
13
18
  item = {
14
19
  "id": "dec9d5415409cc9275f5590c145c3ccf",
15
20
  "title": "Association of Selected State Policies and Requirements for Buprenorphine Treatment With Per Capita Months of Treatment",
@@ -50,7 +55,6 @@ def main
50
55
  }
51
56
  CrawlabRubySdk.save_item(item)
52
57
 
53
-
54
58
  end
55
59
 
56
- main
60
+ # main
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlab_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - min
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-17 00:00:00.000000000 Z
11
+ date: 2023-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: grpc
@@ -90,12 +90,14 @@ files:
90
90
  - lib/entity/request_pb.rb
91
91
  - lib/entity/response_code_pb.rb
92
92
  - lib/entity/response_pb.rb
93
- - lib/entity/result_pb.rb
94
93
  - lib/entity/stream_message_code_pb.rb
95
- - lib/entity/stream_message_data_task_pb.rb
96
94
  - lib/entity/stream_message_pb.rb
97
- - lib/models/node_pb.rb
98
- - lib/models/task_pb.rb
95
+ - lib/models/base.rb
96
+ - lib/models/thinktank_expert.rb
97
+ - lib/models/thinktank_expert_report.rb
98
+ - lib/models/thinktank_information.rb
99
+ - lib/models/thinktank_report.rb
100
+ - lib/verify/verify.rb
99
101
  - ruby_sdk_test.rb
100
102
  homepage: https://github.com/rich-bro/crawlab_ruby_sdk
101
103
  licenses: []
@@ -1,15 +0,0 @@
1
- require 'google/protobuf'
2
-
3
- Google::Protobuf::DescriptorPool.generated_pool.build do
4
- add_file("entity/result.proto", :syntax => :proto3) do
5
- add_message "grpc.Result" do
6
- optional :_tid, :string, 1
7
- optional :name, :string, 2
8
- optional :age, :string, 3
9
- end
10
- end
11
- end
12
-
13
- module Grpc
14
- Result = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Result").msgclass
15
- end
@@ -1,58 +0,0 @@
1
- # # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # # source: entity/stream_message_data_task.proto
3
-
4
- # require 'google/protobuf'
5
-
6
- # Google::Protobuf::DescriptorPool.generated_pool.build do
7
- # add_file(PWD + "/proto/entity/stream_message_data_task.proto", :syntax => :proto3) do
8
- # add_message "grpc.StreamMessageDataTask" do
9
- # optional :task_id, :string, 1
10
- # optional :data, :string,:repeated, 2
11
- # end
12
- # end
13
- # end
14
-
15
- # module Grpc
16
- # StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
17
- # end
18
-
19
-
20
- # frozen_string_literal: true
21
- # Generated by the protocol buffer compiler. DO NOT EDIT!
22
- # source: entity/stream_message_data_task.proto
23
-
24
- require 'google/protobuf'
25
-
26
- # require 'entity/result_pb'
27
-
28
-
29
- descriptor_data = "\n%entity/stream_message_data_task.proto\x12\x04grpc\x1a\x13\x65ntity/result.proto\"D\n\x15StreamMessageDataTask\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x04\x64\x61ta\x18\x02 \x03(\x0b\x32\x0c.grpc.ResultB\x08Z\x06.;grpcb\x06proto3"
30
-
31
- pool = Google::Protobuf::DescriptorPool.generated_pool
32
-
33
- begin
34
- pool.add_serialized_file(descriptor_data)
35
- rescue TypeError => e
36
- # Compatibility code: will be removed in the next major version.
37
- require 'google/protobuf/descriptor_pb'
38
- parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
39
- parsed.clear_dependency
40
- serialized = parsed.class.encode(parsed)
41
- file = pool.add_serialized_file(serialized)
42
- warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
43
- imports = [
44
- ["grpc.Result", "entity/result.proto"],
45
- ]
46
- imports.each do |type_name, expected_filename|
47
- import_file = pool.lookup(type_name).file_descriptor
48
- if import_file.name != expected_filename
49
- warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
50
- end
51
- end
52
- warn "Each proto file must use a consistent fully-qualified name."
53
- warn "This will become an error in the next major version."
54
- end
55
-
56
- module Grpc
57
- StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
58
- end
@@ -1,27 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/node.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("proto/models/node.proto", :syntax => :proto3) do
8
- add_message "grpc.Node" do
9
- optional :_id, :string, 1
10
- optional :name, :string, 2
11
- optional :ip, :string, 3
12
- optional :port, :string, 5
13
- optional :mac, :string, 6
14
- optional :hostname, :string, 7
15
- optional :description, :string, 8
16
- optional :key, :string, 9
17
- optional :is_master, :bool, 11
18
- optional :update_ts, :string, 12
19
- optional :create_ts, :string, 13
20
- optional :update_ts_unix, :int64, 14
21
- end
22
- end
23
- end
24
-
25
- module Grpc
26
- Node = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Node").msgclass
27
- end
@@ -1,26 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/task.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("models/task.proto", :syntax => :proto3) do
8
- add_message "grpc.Task" do
9
- optional :_id, :string, 1
10
- optional :spider_id, :string, 2
11
- optional :status, :string, 5
12
- optional :node_id, :string, 6
13
- optional :cmd, :string, 8
14
- optional :param, :string, 9
15
- optional :error, :string, 10
16
- optional :pid, :int32, 16
17
- optional :run_type, :string, 17
18
- optional :schedule_id, :string, 18
19
- optional :type, :string, 19
20
- end
21
- end
22
- end
23
-
24
- module Grpc
25
- Task = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Task").msgclass
26
- end