crawlab_ruby_sdk 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3fc9c6d1277660d63c17811f472dd56d438a4998c6d0c03e99d1a9e5c0357868
4
- data.tar.gz: 20b8ffa1ba0b1275c66f1a79cc80a479d39211b781174107a4cc21bf8ca86fb1
3
+ metadata.gz: 1c85282caf96edf6a9100a70b045bb1004a471db9d2a7842f83e77a28fbde32e
4
+ data.tar.gz: cc64ce0004eeede08da51a5a7eec294f0db1b494d897079562a1578128960a1e
5
5
  SHA512:
6
- metadata.gz: 5b925b182c7fa7fcbd8d7c9d3a2883e8acee1fa41eafc01a28aaa1ba78c037f8d17f1fed831846001b1761b1480854e908732ac5e8e3b500828267baebc1721c
7
- data.tar.gz: ae63a9d061c104cf8ad8c3e9c4d72b1a70f9f21dbe4c7b07d299ee5cfafe048a008f16ee26ed94efda8b64542aed817f33c2dbdd0323cab28d0c2ce1d8cd67ea
6
+ metadata.gz: 012211e75c7de44e0493c931ce3d6d4cd8f1351da52ae4bd77da3aca95e39294d3c4c9e2c5cf2d5ebd877dcb1dbd2e45bc9f9b8ce2199ffdf3d71c3f6de517a4
7
+ data.tar.gz: 380d04dddd44c608e80eebc23f601f23acaacc08fe291e6a711749e65b00d7aef664a8d30a0a0537bfec82c4e02620da3dfdbcf259bd1bd6aecc2bebaaf821d2
@@ -1,3 +1,3 @@
1
1
  module CrawlabRubySdk
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
@@ -1,4 +1,4 @@
1
- require "crawlab_ruby_sdk/version"
1
+ # require "crawlab_ruby_sdk/version"
2
2
  require "grpc"
3
3
  require "json"
4
4
  def traverse_dir(file_path)
@@ -14,17 +14,18 @@ dir = File.expand_path("..", __FILE__)
14
14
  # puts dir
15
15
 
16
16
  traverse_dir(dir+'/entity/stream_message_code_pb.rb')
17
- traverse_dir(dir+'/entity/result_pb.rb')
18
17
  traverse_dir(dir+'/entity/stream_message_pb.rb')
19
- traverse_dir(dir+'/entity/stream_message_data_task_pb.rb')
20
18
  traverse_dir(dir+'/client')
19
+ traverse_dir(dir+'/models/base.rb')
20
+ traverse_dir(dir+'/models')
21
+ traverse_dir(dir+'/verify')
21
22
 
22
23
  module CrawlabRubySdk
23
24
  class Error < StandardError; end
24
25
  # Your code goes here...
25
26
 
26
27
 
27
- def self.save_item(item={})
28
+ def self.save_item(item={},table_name="")
28
29
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
29
30
  if address==nil || address == ""
30
31
  address = "localhost:9666"
@@ -34,6 +35,10 @@ module CrawlabRubySdk
34
35
  if auth==nil || auth == ""
35
36
  auth = "Crawlab2021!"
36
37
  end
38
+
39
+ if !Verify.IsVerified?([item],table_name)
40
+ return
41
+ end
37
42
 
38
43
  client = TaskServiceClient.new(address,auth)
39
44
 
@@ -42,7 +47,7 @@ module CrawlabRubySdk
42
47
  save(sub_client,[item])
43
48
  end
44
49
 
45
- def self.save_items(items=[])
50
+ def self.save_items(items=[],table_name="")
46
51
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
47
52
  if address==nil || address == ""
48
53
  address = "localhost:9666"
@@ -52,6 +57,9 @@ module CrawlabRubySdk
52
57
  if auth==nil || auth == ""
53
58
  auth = "Crawlab2021!"
54
59
  end
60
+ if !Verify.IsVerified?([item],table_name)
61
+ return
62
+ end
55
63
 
56
64
  client = TaskServiceClient.new(address,auth)
57
65
 
@@ -90,15 +98,11 @@ module CrawlabRubySdk
90
98
  end
91
99
 
92
100
  data = {task_id: task_id,data:records}.to_json.b
93
- # data = data.encode("utf-8")
94
- # puts data
95
-
96
- # data = data.encode('ASCII-8BIT', invalid: :replace, undef: :replace, replace: '')
97
- # puts data
98
101
 
99
102
  msg = Grpc::StreamMessage.new(code:3,data:data)
103
+ puts data
100
104
 
101
- sub_client.Send([msg])
105
+ # sub_client.Send([msg])
102
106
  end
103
107
 
104
108
  def self.get_task_id
@@ -0,0 +1,188 @@
1
+ require 'json'
2
+
3
+ $map_models = {}
4
+ class BaseModel
5
+ def self.table_name
6
+ return ""
7
+ end
8
+
9
+ def self.register
10
+ $map_models[table_name] = self.name
11
+ end
12
+
13
+ def self.fields
14
+ JSON.parse(self.new({}).as_json.to_json).keys
15
+ end
16
+
17
+ def self.verify_keys
18
+ return {}
19
+ end
20
+
21
+ def as_json
22
+ {}
23
+ end
24
+
25
+ def verify
26
+ json_data = as_json
27
+ verify_keys = self.class.verify_keys
28
+
29
+ verify_keys.each do |key,fns|
30
+ value = json_data[key.to_sym]
31
+ if fns.class.name == "Array"
32
+ fns.each do |fn|
33
+ if fn == "empty"
34
+ if !verify_empty(value)
35
+ puts "ERROR: #{key} cannot be empty!"
36
+ return false
37
+ end
38
+ elsif fn == "json"
39
+ if !verify_json(value)
40
+ puts "ERROR: #{key} json string parse fail!"
41
+ return false
42
+ end
43
+ elsif fn == "string"
44
+ if !verify_string(value)
45
+ puts "ERROR: #{key} field type is not string!"
46
+ return false
47
+ end
48
+ elsif fn == "int"
49
+ if !verify_int(value)
50
+ puts "ERROR: #{key} field type is not int!"
51
+ return false
52
+ end
53
+ elsif fn.include?("regex")
54
+ if !verify_regex(fn,value)
55
+ puts "ERROR: #{key} regex #{fn} match error"
56
+ return false
57
+ end
58
+ elsif fn.include?("length")
59
+ if !verify_length(fn,value)
60
+ puts "ERROR: #{key} length must be #{fn}"
61
+ return false
62
+ end
63
+ elsif fn.include?("fields")
64
+ if !verify_fields(fn,value)
65
+ puts "ERROR: #{key}:#{fn} not Exist!"
66
+ return false
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def verify_empty(v)
75
+ if v == nil || v == ""
76
+ return false
77
+ end
78
+ return true
79
+ end
80
+
81
+ def verify_json(v)
82
+ begin
83
+ JSON.parse(v)
84
+ return true
85
+ rescue StandardError => e
86
+ return false
87
+ end
88
+ end
89
+
90
+ def verify_string(v)
91
+ if v == nil
92
+ return false
93
+ end
94
+ if v.class.name == "String"
95
+ return true
96
+ end
97
+ return false
98
+ end
99
+
100
+ def verify_int(v)
101
+ if v == nil
102
+ return false
103
+ end
104
+
105
+ if v.class.name == "Integer"
106
+ return true
107
+ end
108
+ return false
109
+ end
110
+
111
+ def verify_regex(fn,v)
112
+ if fn.class.name != "String"
113
+ return false
114
+ end
115
+ regex_arr = fn.split(":")
116
+ if regex_arr.size < 2
117
+ return false
118
+ end
119
+ regex_str = regex_arr[1]
120
+ if regex_str.size == 0
121
+ return false
122
+ end
123
+
124
+ if v == nil || v == ""
125
+ return false
126
+ end
127
+
128
+ if v =~ Regexp.new(regex_str)
129
+ return true
130
+ else
131
+ return false
132
+ end
133
+ end
134
+
135
+ def verify_length(fn,v)
136
+ if fn.class.name != "String"
137
+ return false
138
+ end
139
+ length_arr = fn.split(":")
140
+ if length_arr.size < 2
141
+ return false
142
+ end
143
+ length = length_arr[1].to_i
144
+ if length == 0
145
+ return false
146
+ end
147
+
148
+ if v == nil
149
+ return false
150
+ end
151
+
152
+ if v.to_s.size == length
153
+ return true
154
+ end
155
+
156
+ return false
157
+ end
158
+
159
+ def verify_fields(fn,v)
160
+ if fn.class.name != "String"
161
+ return false
162
+ end
163
+ fields_arr = fn.split(":")
164
+ if length_arr.size < 2
165
+ return false
166
+ end
167
+ fields_str = fields_arr[1]
168
+
169
+ fields = fields_str.split(",")
170
+
171
+ begin
172
+ datas = json.parse(v)
173
+ datas.each do |data|
174
+ fields.each do |field|
175
+ if data[field] == nil
176
+ return false
177
+ end
178
+ end
179
+ end
180
+ rescue StandardError => e
181
+ return false
182
+ end
183
+
184
+ return true
185
+ end
186
+
187
+
188
+ end
@@ -0,0 +1,107 @@
1
+ class ThinktankExpert < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :name
4
+ attr_accessor :title
5
+ attr_accessor :content
6
+ attr_accessor :location
7
+ attr_accessor :area_of_expertise
8
+ attr_accessor :profile_images
9
+ attr_accessor :phone
10
+ attr_accessor :email
11
+ attr_accessor :link
12
+ attr_accessor :audios
13
+ attr_accessor :videos
14
+ attr_accessor :education
15
+ attr_accessor :related_topics
16
+ attr_accessor :site_name
17
+ attr_accessor :site_name_cn
18
+ attr_accessor :domain
19
+ attr_accessor :created_at
20
+ attr_accessor :updated_at
21
+ attr_accessor :source
22
+ attr_accessor :oss_profile_images
23
+ attr_accessor :facebook
24
+ attr_accessor :twitter
25
+ attr_accessor :linkedin
26
+ attr_accessor :instagram
27
+ attr_accessor :wikidata
28
+ attr_accessor :person_type
29
+ attr_accessor :files
30
+ attr_accessor :oss_files
31
+ attr_accessor :associated_program
32
+ attr_accessor :lang
33
+ attr_accessor :website
34
+ attr_accessor :nationalities
35
+
36
+
37
+ def self.table_name
38
+ return "thinktank_experts"
39
+ end
40
+ register
41
+
42
+
43
+ def initialize(options={})
44
+
45
+ end
46
+
47
+ def as_json
48
+ return {
49
+ id: @id,
50
+ name: @name,
51
+ title: @title,
52
+ content: @content,
53
+ location: @location,
54
+ area_of_expertise: @area_of_expertise,
55
+ profile_images: @profile_images,
56
+ phone: @phone,
57
+ email: @email,
58
+ link: @link,
59
+ audios: @audios,
60
+ videos: @videos,
61
+ education: @education,
62
+ related_topics: @related_topics,
63
+ site_name: @site_name,
64
+ site_name_cn: @site_name_cn,
65
+ domain: @domain,
66
+ created_at: @created_at,
67
+ updated_at: @updated_at,
68
+ source: @source,
69
+ oss_profile_images: @oss_profile_images,
70
+ facebook: @facebook,
71
+ twitter: @twitter,
72
+ linkedin: @linkedin,
73
+ instagram: @instagram,
74
+ wikidata: @wikidata,
75
+ person_type: @person_type,
76
+ files: @files,
77
+ oss_files: @oss_files,
78
+ associated_program: @associated_program,
79
+ lang: @lang,
80
+ website: @website,
81
+ nationalities: @nationalities,
82
+ }
83
+ end
84
+
85
+ def to_json
86
+ return as_json.to_json
87
+ end
88
+
89
+ def self.verify_keys
90
+ return {
91
+ "id"=> ["empty", "string"],
92
+ "title"=> ["json", "string"],
93
+ "name"=> ["empty", "string"],
94
+ "site_name_cn"=>["empty", "string"],
95
+ "site_name"=> ["empty", "string"],
96
+ "source"=> ["empty", "string"],
97
+ "audios"=> ["json", "string"],
98
+ "videos"=> ["json", "string"],
99
+ "related_topics"=>["json", "string"],
100
+ "files"=> ["json", "string"],
101
+ "oss_files"=> ["json", "string"],
102
+ "domain"=> ["empty", "string"],
103
+ "created_at"=> ["empty", "int", "length:13"],
104
+ "updated_at"=> ["empty", "int", "length:13"],
105
+ }
106
+ end
107
+ end
@@ -0,0 +1,124 @@
1
+ class ThinktankExpertReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_expert_reports"
41
+ end
42
+ register
43
+
44
+ def initialize(options={})
45
+
46
+ end
47
+
48
+ def as_json
49
+ return {
50
+ id: @id,
51
+ title: @title,
52
+ site_name: @site_name,
53
+ site_name_cn: @site_name_cn,
54
+ abstract: @abstract,
55
+ content: @content,
56
+ author_names: @author_names,
57
+ source: @source,
58
+ files: @files,
59
+ images: @images,
60
+ videos: @videos,
61
+ audios: @audios,
62
+ links: @links,
63
+ domain: @domain,
64
+ keywords: @keywords,
65
+ html_content: @html_content,
66
+ lang: @lang,
67
+ country_cn: @country_cn,
68
+ country_code: @country_code,
69
+ created_at: @created_at,
70
+ updated_at: @updated_at,
71
+ created_time: @created_time,
72
+ oss_files: @oss_files,
73
+ oss_images: @oss_images,
74
+ customer_category: @customer_category,
75
+ category: @category,
76
+ topics: @topics,
77
+ tags: @tags,
78
+ views: @views,
79
+ comments: @comments,
80
+ reference: @reference,
81
+ mention_country: @mention_country,
82
+ authors: @authors,
83
+ sub_title: @sub_title,
84
+ timezone: @timezone,
85
+ timezone_location: @timezone_location,
86
+ }
87
+ end
88
+
89
+ def to_json
90
+ return as_json.to_json
91
+ end
92
+
93
+ def self.verify_keys
94
+ return {
95
+ "id"=> ["empty", "string"],
96
+ "title"=> ["empty", "string"],
97
+ "site_name"=> ["empty", "string"],
98
+ "site_name_cn"=> ["empty", "string"],
99
+ "content"=> ["empty", "string"],
100
+ "source"=> ["empty", "string"],
101
+ "files"=> ["json", "string"],
102
+ "images"=> ["json", "string"],
103
+ "videos"=> ["json", "string"],
104
+ "audios"=> ["json", "string"],
105
+ "links"=> ["json", "string"],
106
+ "domain"=> ["empty", "string"],
107
+ "keywords"=> ["json", "string"],
108
+ "lang"=> ["empty", "string"],
109
+ "country_cn"=> ["empty", "string"],
110
+ "country_code"=> ["empty", "string"],
111
+ "created_at"=> ["empty", "int", "length:13"],
112
+ "updated_at"=> ["empty", "int", "length:13"],
113
+ "created_time"=> ["empty", "int", "length:10"],
114
+ "oss_files"=> ["json", "string"],
115
+ "oss_images"=> ["json", "string"],
116
+ "topics"=> ["json", "string"],
117
+ "tags"=> ["string", "json"],
118
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
119
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
120
+ "timezone_location"=> ["empty", "string"],
121
+ }
122
+ end
123
+
124
+ end
@@ -0,0 +1,126 @@
1
+ class ThinktankInformation < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :state_info
10
+ attr_accessor :source
11
+ attr_accessor :files
12
+ attr_accessor :images
13
+ attr_accessor :videos
14
+ attr_accessor :audios
15
+ attr_accessor :links
16
+ attr_accessor :domain
17
+ attr_accessor :keywords
18
+ attr_accessor :html_content
19
+ attr_accessor :lang
20
+ attr_accessor :country_cn
21
+ attr_accessor :country_code
22
+ attr_accessor :created_at
23
+ attr_accessor :updated_at
24
+ attr_accessor :created_time
25
+ attr_accessor :oss_files
26
+ attr_accessor :oss_images
27
+ attr_accessor :customer_category
28
+ attr_accessor :category
29
+ attr_accessor :topics
30
+ attr_accessor :tags
31
+ attr_accessor :views
32
+ attr_accessor :comments
33
+ attr_accessor :reference
34
+ attr_accessor :mention_country
35
+ attr_accessor :authors
36
+ attr_accessor :sub_title
37
+ attr_accessor :timezone
38
+ attr_accessor :timezone_location
39
+
40
+ def self.table_name
41
+ return "thinktank_informations"
42
+ end
43
+ register
44
+
45
+
46
+ def initialize(options={})
47
+
48
+ end
49
+
50
+ def as_json
51
+ return {
52
+ id: @id,
53
+ title: @title,
54
+ site_name: @site_name,
55
+ site_name_cn: @site_name_cn,
56
+ abstract: @abstract,
57
+ content: @content,
58
+ author_names: @author_names,
59
+ state_info: @state_info,
60
+ source: @source,
61
+ files: @files,
62
+ images: @images,
63
+ videos: @videos,
64
+ links: @links,
65
+ domain: @domain,
66
+ keywords: @keywords,
67
+ html_content: @html_content,
68
+ lang: @lang,
69
+ country_cn: @country_cn,
70
+ country_code: @country_code,
71
+ created_at: @created_at,
72
+ updated_at: @updated_at,
73
+ created_time: @created_time,
74
+ oss_files: @oss_files,
75
+ oss_images: @oss_images,
76
+ customer_category: @customer_category,
77
+ category: @category,
78
+ topics: @topics,
79
+ tags: @tags,
80
+ views: @views,
81
+ comments: @comments,
82
+ reference: @reference,
83
+ mention_country: @mention_country,
84
+ authors: @authors,
85
+ sub_title: @sub_title,
86
+ timezone: @timezone,
87
+ timezone_location: @timezone_location
88
+
89
+ }
90
+ end
91
+
92
+ def to_json
93
+ return as_json.to_json
94
+ end
95
+
96
+ def self.verify_keys
97
+ return {
98
+ "id"=> ["empty", "string"],
99
+ "title"=> ["empty", "string"],
100
+ "site_name"=> ["empty", "string"],
101
+ "site_name_cn"=> ["empty", "string"],
102
+ "content"=> ["empty", "string"],
103
+ "source"=> ["empty", "string"],
104
+ "files"=> ["json", "string"],
105
+ "images"=> ["json", "string"],
106
+ "videos"=> ["json", "string"],
107
+ "audios"=> ["json", "string"],
108
+ "links"=> ["json", "string"],
109
+ "domain"=> ["empty", "string"],
110
+ "keywords"=> ["json", "string"],
111
+ "lang"=> ["empty", "string"],
112
+ "country_cn"=> ["empty", "string"],
113
+ "country_code"=> ["empty", "string"],
114
+ "created_at"=> ["empty", "int", "length:13"],
115
+ "updated_at"=> ["empty", "int", "length:13"],
116
+ "created_time"=> ["empty", "int", "length:10"],
117
+ "oss_files"=> ["json", "string"],
118
+ "oss_images"=> ["json", "string"],
119
+ "topics"=> ["json", "string"],
120
+ "tags"=> ["string", "json"],
121
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
122
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
123
+ "timezone_location"=> ["empty", "string"],
124
+ }
125
+ end
126
+ end
@@ -0,0 +1,125 @@
1
+ class ThinktankReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_reports"
41
+ end
42
+ register
43
+
44
+
45
+ def initialize(options = {})
46
+
47
+ end
48
+
49
+ def to_json
50
+ return as_json.to_json
51
+ end
52
+
53
+ def as_json
54
+ return {
55
+ id: @id,
56
+ title: @title,
57
+ site_name: @site_name,
58
+ site_name_cn: @site_name_cn,
59
+ abstract: @abstract,
60
+ content: @content,
61
+ author_names: @author_names,
62
+ source: @source,
63
+ files: @files,
64
+ images: @images,
65
+ videos: @videos,
66
+ audios: @audios,
67
+ links: @links,
68
+ domain: @domain,
69
+ keywords: @keywords,
70
+ html_content: @html_content,
71
+ lang: @lang,
72
+ country_cn: @country_cn,
73
+ country_code: @country_code,
74
+ created_at: @created_at,
75
+ updated_at: @updated_at,
76
+ created_time: @created_time,
77
+ oss_files: @oss_files,
78
+ oss_images: @oss_images,
79
+ customer_category: @customer_category,
80
+ category: @category,
81
+ topics: @topics,
82
+ tags: @tags,
83
+ views: @views,
84
+ comments: @comments,
85
+ reference: @reference,
86
+ mention_country: @mention_country,
87
+ authors: @authors,
88
+ sub_title: @sub_title,
89
+ timezone: @timezone,
90
+ timezone_location: @timezone_location
91
+ }
92
+ end
93
+
94
+ def self.verify_keys
95
+ return {
96
+ "id"=> ["empty", "string"],
97
+ "title"=> ["empty", "string"],
98
+ "site_name"=> ["empty", "string"],
99
+ "site_name_cn"=> ["empty", "string"],
100
+ "content"=> ["empty", "string"],
101
+ "source"=> ["empty", "string"],
102
+ "files"=> ["json", "string"],
103
+ "images"=> ["json", "string"],
104
+ "videos"=> ["json", "string"],
105
+ "audios"=> ["json", "string"],
106
+ "links"=> ["json", "string"],
107
+ "domain"=> ["empty", "string"],
108
+ "keywords"=> ["json", "string"],
109
+ "lang"=> ["empty", "string"],
110
+ "country_cn"=> ["empty", "string"],
111
+ "country_code"=> ["empty", "string"],
112
+ "created_at"=> ["empty", "int", "length:13"],
113
+ "updated_at"=> ["empty", "int", "length:13"],
114
+ "created_time"=> ["empty", "int", "length:10"],
115
+ "oss_files"=> ["json", "string"],
116
+ "oss_images"=> ["json", "string"],
117
+ "topics"=> ["json", "string"],
118
+ "tags"=> ["string", "json"],
119
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
120
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
121
+ "timezone_location"=> ["empty", "string"],
122
+ }
123
+ end
124
+
125
+ end
@@ -0,0 +1,53 @@
1
+ class Verify
2
+ def self.IsVerified?(items=[],table_name="")
3
+
4
+ if table_name == "" && items.size > 0
5
+ table_name = switch_table(items[0])
6
+ end
7
+
8
+ if table_name == ""
9
+ puts "未匹配,不验证"
10
+ else
11
+ puts "验证:#{table_name}"
12
+ end
13
+
14
+ if table_name != ""
15
+ kclass = Object.const_get $map_models[table_name]
16
+
17
+ if kclass == nil
18
+ puts "ERROR #{table_name} 验证 not Exist!"
19
+ return true
20
+ end
21
+
22
+ items.each do |item|
23
+ if !kclass.new(item).verify
24
+ return false
25
+ end
26
+ end
27
+ end
28
+
29
+
30
+ return true
31
+ end
32
+
33
+ def self.switch_table(item={})
34
+ table_name = ""
35
+
36
+ item_keys = JSON.parse(item.to_json).keys
37
+ # puts "----#{item_keys}--"
38
+
39
+ $map_models.each do |k,v|
40
+ klass = Object.const_get v
41
+ klass_keys = klass.fields
42
+
43
+ # puts "----#{klass_keys}--"
44
+ sub_keys = item_keys - klass_keys
45
+ if sub_keys.size == 0 && item_keys.size == klass_keys.size
46
+ return k
47
+ end
48
+ end
49
+
50
+ return table_name
51
+ end
52
+
53
+ end
data/ruby_sdk_test.rb CHANGED
@@ -10,6 +10,11 @@ def main
10
10
 
11
11
  CrawlabRubySdk.save_item(a)
12
12
 
13
+ # CrawlabRubySdk.save_item(a,"thinktank_experts")
14
+ # CrawlabRubySdk.save_item(a,"thinktank_expert_reports")
15
+ # CrawlabRubySdk.save_item(a,"thinktank_informations")
16
+ # CrawlabRubySdk.save_item(a,"thinktank_reports")
17
+
13
18
  item = {
14
19
  "id": "dec9d5415409cc9275f5590c145c3ccf",
15
20
  "title": "Association of Selected State Policies and Requirements for Buprenorphine Treatment With Per Capita Months of Treatment",
@@ -50,7 +55,6 @@ def main
50
55
  }
51
56
  CrawlabRubySdk.save_item(item)
52
57
 
53
-
54
58
  end
55
59
 
56
- main
60
+ # main
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlab_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - min
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-17 00:00:00.000000000 Z
11
+ date: 2023-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: grpc
@@ -90,12 +90,14 @@ files:
90
90
  - lib/entity/request_pb.rb
91
91
  - lib/entity/response_code_pb.rb
92
92
  - lib/entity/response_pb.rb
93
- - lib/entity/result_pb.rb
94
93
  - lib/entity/stream_message_code_pb.rb
95
- - lib/entity/stream_message_data_task_pb.rb
96
94
  - lib/entity/stream_message_pb.rb
97
- - lib/models/node_pb.rb
98
- - lib/models/task_pb.rb
95
+ - lib/models/base.rb
96
+ - lib/models/thinktank_expert.rb
97
+ - lib/models/thinktank_expert_report.rb
98
+ - lib/models/thinktank_information.rb
99
+ - lib/models/thinktank_report.rb
100
+ - lib/verify/verify.rb
99
101
  - ruby_sdk_test.rb
100
102
  homepage: https://github.com/rich-bro/crawlab_ruby_sdk
101
103
  licenses: []
@@ -1,15 +0,0 @@
1
- require 'google/protobuf'
2
-
3
- Google::Protobuf::DescriptorPool.generated_pool.build do
4
- add_file("entity/result.proto", :syntax => :proto3) do
5
- add_message "grpc.Result" do
6
- optional :_tid, :string, 1
7
- optional :name, :string, 2
8
- optional :age, :string, 3
9
- end
10
- end
11
- end
12
-
13
- module Grpc
14
- Result = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Result").msgclass
15
- end
@@ -1,58 +0,0 @@
1
- # # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # # source: entity/stream_message_data_task.proto
3
-
4
- # require 'google/protobuf'
5
-
6
- # Google::Protobuf::DescriptorPool.generated_pool.build do
7
- # add_file(PWD + "/proto/entity/stream_message_data_task.proto", :syntax => :proto3) do
8
- # add_message "grpc.StreamMessageDataTask" do
9
- # optional :task_id, :string, 1
10
- # optional :data, :string,:repeated, 2
11
- # end
12
- # end
13
- # end
14
-
15
- # module Grpc
16
- # StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
17
- # end
18
-
19
-
20
- # frozen_string_literal: true
21
- # Generated by the protocol buffer compiler. DO NOT EDIT!
22
- # source: entity/stream_message_data_task.proto
23
-
24
- require 'google/protobuf'
25
-
26
- # require 'entity/result_pb'
27
-
28
-
29
- descriptor_data = "\n%entity/stream_message_data_task.proto\x12\x04grpc\x1a\x13\x65ntity/result.proto\"D\n\x15StreamMessageDataTask\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x04\x64\x61ta\x18\x02 \x03(\x0b\x32\x0c.grpc.ResultB\x08Z\x06.;grpcb\x06proto3"
30
-
31
- pool = Google::Protobuf::DescriptorPool.generated_pool
32
-
33
- begin
34
- pool.add_serialized_file(descriptor_data)
35
- rescue TypeError => e
36
- # Compatibility code: will be removed in the next major version.
37
- require 'google/protobuf/descriptor_pb'
38
- parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
39
- parsed.clear_dependency
40
- serialized = parsed.class.encode(parsed)
41
- file = pool.add_serialized_file(serialized)
42
- warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
43
- imports = [
44
- ["grpc.Result", "entity/result.proto"],
45
- ]
46
- imports.each do |type_name, expected_filename|
47
- import_file = pool.lookup(type_name).file_descriptor
48
- if import_file.name != expected_filename
49
- warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
50
- end
51
- end
52
- warn "Each proto file must use a consistent fully-qualified name."
53
- warn "This will become an error in the next major version."
54
- end
55
-
56
- module Grpc
57
- StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
58
- end
@@ -1,27 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/node.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("proto/models/node.proto", :syntax => :proto3) do
8
- add_message "grpc.Node" do
9
- optional :_id, :string, 1
10
- optional :name, :string, 2
11
- optional :ip, :string, 3
12
- optional :port, :string, 5
13
- optional :mac, :string, 6
14
- optional :hostname, :string, 7
15
- optional :description, :string, 8
16
- optional :key, :string, 9
17
- optional :is_master, :bool, 11
18
- optional :update_ts, :string, 12
19
- optional :create_ts, :string, 13
20
- optional :update_ts_unix, :int64, 14
21
- end
22
- end
23
- end
24
-
25
- module Grpc
26
- Node = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Node").msgclass
27
- end
@@ -1,26 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/task.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("models/task.proto", :syntax => :proto3) do
8
- add_message "grpc.Task" do
9
- optional :_id, :string, 1
10
- optional :spider_id, :string, 2
11
- optional :status, :string, 5
12
- optional :node_id, :string, 6
13
- optional :cmd, :string, 8
14
- optional :param, :string, 9
15
- optional :error, :string, 10
16
- optional :pid, :int32, 16
17
- optional :run_type, :string, 17
18
- optional :schedule_id, :string, 18
19
- optional :type, :string, 19
20
- end
21
- end
22
- end
23
-
24
- module Grpc
25
- Task = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Task").msgclass
26
- end