crawlab_ruby_sdk 0.1.6 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3fc9c6d1277660d63c17811f472dd56d438a4998c6d0c03e99d1a9e5c0357868
4
- data.tar.gz: 20b8ffa1ba0b1275c66f1a79cc80a479d39211b781174107a4cc21bf8ca86fb1
3
+ metadata.gz: c5c857dd2f243d47318786c542faec1a9fc7a6a52bba0edf7523313379b4f2d4
4
+ data.tar.gz: fbbacb701ad2bd5d090d85ba3322cd82a5c02f5d7d0d3ab729ee2b2b0d0882c1
5
5
  SHA512:
6
- metadata.gz: 5b925b182c7fa7fcbd8d7c9d3a2883e8acee1fa41eafc01a28aaa1ba78c037f8d17f1fed831846001b1761b1480854e908732ac5e8e3b500828267baebc1721c
7
- data.tar.gz: ae63a9d061c104cf8ad8c3e9c4d72b1a70f9f21dbe4c7b07d299ee5cfafe048a008f16ee26ed94efda8b64542aed817f33c2dbdd0323cab28d0c2ce1d8cd67ea
6
+ metadata.gz: 0f9752ef0ad2e59d72fa870a6493a8d831716c0422bb4d6d08a0afc3cadc08ee9b4b80eb97af5192ab1094ad7884533b3c7c75cc76d1a47fe7c0b90f991538d0
7
+ data.tar.gz: 47e6a0373338ad0d32b8286d2b422da55a67d5728655fca51a028a17928ee301b981d17cb396e06d23f8911ac37663b471360f8ecce97e55b7f8595e4e8096c1
data/README.md CHANGED
@@ -39,6 +39,19 @@ Or install it yourself as:
39
39
  puts bucket_url
40
40
  ```
41
41
 
42
+ ##### 字段验证规则
43
+
44
+ ```
45
+ CrawlabRubySdk.save_item({name: "haha",age:12},table_name)
46
+ CrawlabRubySdk.save_items([{name: "haha",age:12},{name:"456",age:34}],table_name)
47
+ table_name 可以是这几个["thinktank_expert_reports", "thinktank_informations", "thinktank_experts", "thinktank_reports"] 中的一个,也可以不传递
48
+
49
+ 1、如果当前采集的结果 存储的数据表是上面几个中的一个,无论是否传递,都会按照对应的表结构的 字段规则验证字段
50
+ 2、如果当前采集的结果,存储的数据表不属于上面几个中的一个,则按照传递传递的table_name,对应的字段规则验证字段
51
+ 3、如果没有传递table_name,并且采集的结果存储临时表,则根据推送的结果的字段匹配 上面四个表的字段,匹配上了就按照匹配上的表结构的字段规则验证,没有匹配上则不验证
52
+ ```
53
+
54
+
42
55
  ## Development
43
56
 
44
57
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,3 +1,3 @@
1
1
  module CrawlabRubySdk
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.8"
3
3
  end
@@ -1,4 +1,4 @@
1
- require "crawlab_ruby_sdk/version"
1
+ # require "crawlab_ruby_sdk/version"
2
2
  require "grpc"
3
3
  require "json"
4
4
  def traverse_dir(file_path)
@@ -14,17 +14,18 @@ dir = File.expand_path("..", __FILE__)
14
14
  # puts dir
15
15
 
16
16
  traverse_dir(dir+'/entity/stream_message_code_pb.rb')
17
- traverse_dir(dir+'/entity/result_pb.rb')
18
17
  traverse_dir(dir+'/entity/stream_message_pb.rb')
19
- traverse_dir(dir+'/entity/stream_message_data_task_pb.rb')
20
18
  traverse_dir(dir+'/client')
19
+ traverse_dir(dir+'/models/base.rb')
20
+ traverse_dir(dir+'/models')
21
+ traverse_dir(dir+'/verify')
21
22
 
22
23
  module CrawlabRubySdk
23
24
  class Error < StandardError; end
24
25
  # Your code goes here...
25
26
 
26
27
 
27
- def self.save_item(item={})
28
+ def self.save_item(item={},table_name="")
28
29
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
29
30
  if address==nil || address == ""
30
31
  address = "localhost:9666"
@@ -34,6 +35,11 @@ module CrawlabRubySdk
34
35
  if auth==nil || auth == ""
35
36
  auth = "Crawlab2021!"
36
37
  end
38
+
39
+ table_name = get_table_name(table_name)
40
+ if !Verify.IsVerified?([item],table_name)
41
+ return
42
+ end
37
43
 
38
44
  client = TaskServiceClient.new(address,auth)
39
45
 
@@ -42,7 +48,7 @@ module CrawlabRubySdk
42
48
  save(sub_client,[item])
43
49
  end
44
50
 
45
- def self.save_items(items=[])
51
+ def self.save_items(items=[],table_name="")
46
52
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
47
53
  if address==nil || address == ""
48
54
  address = "localhost:9666"
@@ -52,6 +58,11 @@ module CrawlabRubySdk
52
58
  if auth==nil || auth == ""
53
59
  auth = "Crawlab2021!"
54
60
  end
61
+
62
+ table_name = get_table_name(table_name)
63
+ if !Verify.IsVerified?(items,table_name)
64
+ return
65
+ end
55
66
 
56
67
  client = TaskServiceClient.new(address,auth)
57
68
 
@@ -90,11 +101,8 @@ module CrawlabRubySdk
90
101
  end
91
102
 
92
103
  data = {task_id: task_id,data:records}.to_json.b
93
- # data = data.encode("utf-8")
94
- # puts data
95
104
 
96
- # data = data.encode('ASCII-8BIT', invalid: :replace, undef: :replace, replace: '')
97
- # puts data
105
+ # puts "=====#{{task_id: task_id,data:records}.to_json}==="
98
106
 
99
107
  msg = Grpc::StreamMessage.new(code:3,data:data)
100
108
 
@@ -116,4 +124,14 @@ module CrawlabRubySdk
116
124
  def self.save_file_stream_to_oss(oss_path,stream)
117
125
  OssServerClient.new.send_stream(oss_path,stream)
118
126
  end
127
+
128
+ def self.get_table_name(table_name="")
129
+ table_names = Verify.table_names
130
+ if ENV["TABLE_NAME"] != nil && ENV["TABLE_NAME"] != "" && table_names.include?(ENV["TABLE_NAME"])
131
+ table_name = ENV["TABLE_NAME"]
132
+ return table_name
133
+ end
134
+
135
+ return table_name
136
+ end
119
137
  end
@@ -0,0 +1,191 @@
1
+ require 'json'
2
+
3
+ $map_models = {}
4
+ class BaseModel
5
+ def self.table_name
6
+ return ""
7
+ end
8
+
9
+ def self.register
10
+ $map_models[table_name] = self.name
11
+ end
12
+
13
+ def self.fields
14
+ JSON.parse(self.new({}).as_json.to_json).keys
15
+ end
16
+
17
+ def self.verify_keys
18
+ return {}
19
+ end
20
+
21
+ def as_json
22
+ {}
23
+ end
24
+
25
+ def verify
26
+ json_data = as_json
27
+ verify_keys = self.class.verify_keys
28
+
29
+ verify_keys.each do |key,fns|
30
+ value = json_data[key.to_sym]
31
+ if fns.class.name == "Array"
32
+ fns.each do |fn|
33
+ if fn == "empty"
34
+ if !verify_empty(value)
35
+ puts "ERROR: #{key} cannot be empty!"
36
+ return false
37
+ end
38
+ elsif fn == "json"
39
+ if !verify_json(value)
40
+ puts "ERROR: #{key} json string parse fail!"
41
+ return false
42
+ end
43
+ elsif fn == "string"
44
+ if !verify_string(value)
45
+ puts "ERROR: #{key} field type is not string!"
46
+ return false
47
+ end
48
+ elsif fn == "int"
49
+ if !verify_int(value)
50
+ puts "ERROR: #{key} field type is not int!"
51
+ return false
52
+ end
53
+ elsif fn.include?("regex")
54
+ if !verify_regex(fn,value)
55
+ puts "ERROR: #{key} regex #{fn} match error"
56
+ return false
57
+ end
58
+ elsif fn.include?("length")
59
+ if !verify_length(fn,value)
60
+ puts "ERROR: #{key} length must be #{fn}"
61
+ return false
62
+ end
63
+ elsif fn.include?("fields")
64
+ if !verify_fields(fn,value)
65
+ puts "ERROR: #{key}:#{fn} not Exist!"
66
+ return false
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def verify_empty(v)
75
+ if v == nil || v == ""
76
+ return false
77
+ end
78
+ return true
79
+ end
80
+
81
+ def verify_json(v)
82
+ begin
83
+ JSON.parse(v)
84
+ return true
85
+ rescue StandardError => e
86
+ return false
87
+ end
88
+ return true
89
+ end
90
+
91
+ def verify_string(v)
92
+ if v == nil
93
+ return false
94
+ end
95
+ if v.class.name == "String"
96
+ return true
97
+ end
98
+ return false
99
+ end
100
+
101
+ def verify_int(v)
102
+ if v == nil
103
+ return false
104
+ end
105
+
106
+ if v.class.name == "Integer"
107
+ return true
108
+ end
109
+ return false
110
+ end
111
+
112
+ def verify_regex(fn,v)
113
+ if fn.class.name != "String"
114
+ return false
115
+ end
116
+ regex_arr = fn.split(":")
117
+ if regex_arr.size < 2
118
+ return false
119
+ end
120
+ regex_str = regex_arr[1]
121
+ if regex_str.size == 0
122
+ return false
123
+ end
124
+
125
+ if v == nil || v == ""
126
+ return false
127
+ end
128
+
129
+ if v =~ Regexp.new(regex_str)
130
+ return true
131
+ else
132
+ return false
133
+ end
134
+ end
135
+
136
+ def verify_length(fn,v)
137
+ if fn.class.name != "String"
138
+ return false
139
+ end
140
+ length_arr = fn.split(":")
141
+ if length_arr.size < 2
142
+ return false
143
+ end
144
+ length = length_arr[1].to_i
145
+ if length == 0
146
+ return false
147
+ end
148
+
149
+ if v == nil
150
+ return false
151
+ end
152
+
153
+ if v.to_s.size == length
154
+ return true
155
+ end
156
+
157
+ return false
158
+ end
159
+
160
+ def verify_fields(fn,v)
161
+ if fn.class.name != "String"
162
+ return false
163
+ end
164
+ fields_arr = fn.split(":")
165
+ if fields_arr.size < 2
166
+ return false
167
+ end
168
+ fields_str = fields_arr[1]
169
+
170
+ fields = fields_str.split(",")
171
+
172
+ begin
173
+ datas = JSON.parse(v)
174
+ datas.each do |data|
175
+ fields.each do |field|
176
+ if data[field] == nil
177
+ puts "ERROR #{field} not Exist!"
178
+ return false
179
+ end
180
+ end
181
+ end
182
+ rescue StandardError => e
183
+ puts e
184
+ return false
185
+ end
186
+
187
+ return true
188
+ end
189
+
190
+
191
+ end
@@ -0,0 +1,140 @@
1
+ class ThinktankExpert < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :name
4
+ attr_accessor :title
5
+ attr_accessor :content
6
+ attr_accessor :location
7
+ attr_accessor :area_of_expertise
8
+ attr_accessor :profile_images
9
+ attr_accessor :phone
10
+ attr_accessor :email
11
+ attr_accessor :link
12
+ attr_accessor :audios
13
+ attr_accessor :videos
14
+ attr_accessor :education
15
+ attr_accessor :related_topics
16
+ attr_accessor :site_name
17
+ attr_accessor :site_name_cn
18
+ attr_accessor :domain
19
+ attr_accessor :created_at
20
+ attr_accessor :updated_at
21
+ attr_accessor :source
22
+ attr_accessor :oss_profile_images
23
+ attr_accessor :facebook
24
+ attr_accessor :twitter
25
+ attr_accessor :linkedin
26
+ attr_accessor :instagram
27
+ attr_accessor :wikidata
28
+ attr_accessor :person_type
29
+ attr_accessor :files
30
+ attr_accessor :oss_files
31
+ attr_accessor :associated_program
32
+ attr_accessor :lang
33
+ attr_accessor :website
34
+ attr_accessor :nationalities
35
+
36
+
37
+ def self.table_name
38
+ return "thinktank_experts"
39
+ end
40
+ register
41
+
42
+
43
+ def initialize(options={})
44
+ options = JSON.parse(options.to_json)
45
+ @id = options["id"]
46
+ @name = options["name"]
47
+ @title = options["title"]
48
+ @content = options["content"]
49
+ @location = options["location"]
50
+ @area_of_expertise = options["area_of_expertise"]
51
+ @profile_images = options["profile_images"]
52
+ @phone = options["phone"]
53
+ @email = options["email"]
54
+ @link = options["link"]
55
+ @audios = options["audios"]
56
+ @videos = options["videos"]
57
+ @education = options["education"]
58
+ @related_topics = options["related_topics"]
59
+ @site_name = options["site_name"]
60
+ @site_name_cn = options["site_name_cn"]
61
+ @domain = options["domain"]
62
+ @created_at = options["created_at"]
63
+ @updated_at = options["updated_at"]
64
+ @source = options["source"]
65
+ @oss_profile_images = options["oss_profile_images"]
66
+ @facebook = options["facebook"]
67
+ @twitter = options["twitter"]
68
+ @linkedin = options["linkedin"]
69
+ @instagram = options["instagram"]
70
+ @wikidata = options["wikidata"]
71
+ @person_type = options["person_type"]
72
+ @files = options["files"]
73
+ @oss_files = options["oss_files"]
74
+ @associated_program = options["associated_program"]
75
+ @lang = options["lang"]
76
+ @website = options["website"]
77
+ @nationalities = options["nationalities"]
78
+ end
79
+
80
+ def as_json
81
+ return {
82
+ id: @id,
83
+ name: @name,
84
+ title: @title,
85
+ content: @content,
86
+ location: @location,
87
+ area_of_expertise: @area_of_expertise,
88
+ profile_images: @profile_images,
89
+ phone: @phone,
90
+ email: @email,
91
+ link: @link,
92
+ audios: @audios,
93
+ videos: @videos,
94
+ education: @education,
95
+ related_topics: @related_topics,
96
+ site_name: @site_name,
97
+ site_name_cn: @site_name_cn,
98
+ domain: @domain,
99
+ created_at: @created_at,
100
+ updated_at: @updated_at,
101
+ source: @source,
102
+ oss_profile_images: @oss_profile_images,
103
+ facebook: @facebook,
104
+ twitter: @twitter,
105
+ linkedin: @linkedin,
106
+ instagram: @instagram,
107
+ wikidata: @wikidata,
108
+ person_type: @person_type,
109
+ files: @files,
110
+ oss_files: @oss_files,
111
+ associated_program: @associated_program,
112
+ lang: @lang,
113
+ website: @website,
114
+ nationalities: @nationalities,
115
+ }
116
+ end
117
+
118
+ def to_json
119
+ return as_json.to_json
120
+ end
121
+
122
+ def self.verify_keys
123
+ return {
124
+ "id"=> ["empty", "string"],
125
+ "title"=> ["json", "string"],
126
+ "name"=> ["empty", "string"],
127
+ "site_name_cn"=>["empty", "string"],
128
+ "site_name"=> ["empty", "string"],
129
+ "source"=> ["empty", "string"],
130
+ "audios"=> ["json", "string"],
131
+ "videos"=> ["json", "string"],
132
+ "related_topics"=>["json", "string"],
133
+ "files"=> ["json", "string"],
134
+ "oss_files"=> ["json", "string"],
135
+ "domain"=> ["empty", "string"],
136
+ "created_at"=> ["empty", "int", "length:13"],
137
+ "updated_at"=> ["empty", "int", "length:13"],
138
+ }
139
+ end
140
+ end
@@ -0,0 +1,160 @@
1
+ class ThinktankExpertReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_expert_reports"
41
+ end
42
+ register
43
+
44
+ def initialize(options={})
45
+ options = JSON.parse(options.to_json)
46
+ @id = options["id"]
47
+ @title = options["title"]
48
+ @site_name = options["site_name"]
49
+ @site_name_cn = options["site_name_cn"]
50
+ @abstract = options["abstract"]
51
+ @content = options["content"]
52
+ @author_names = options["author_names"]
53
+ @source = options["source"]
54
+ @files = options["files"]
55
+ @images = options["images"]
56
+ @videos = options["videos"]
57
+ @audios = options["audios"]
58
+ @links = options["links"]
59
+ @domain = options["domain"]
60
+ @keywords= options["keywords"]
61
+ @html_content = options["html_content"]
62
+ @lang = options["lang"]
63
+ @country_cn = options["country_cn"]
64
+ @country_code = options["country_code"]
65
+ @created_at = options["created_at"]
66
+ @updated_at = options["updated_at"]
67
+ @created_time = options["created_time"]
68
+ @oss_files = options["oss_files"]
69
+ @oss_images = options["oss_images"]
70
+ @customer_category = options["customer_category"]
71
+ @category = options["category"]
72
+ @topics = options["topics"]
73
+ @tags = options["tags"]
74
+ @views = options["views"]
75
+ @comments = options["comments"]
76
+ @reference = options["reference"]
77
+ @mention_country = options["mention_country"]
78
+ @authors = options["authors"]
79
+ @sub_title = options["sub_title"]
80
+ @timezone = options["timezone"]
81
+ @timezone_location = options["timezone_location"]
82
+ end
83
+
84
+ def as_json
85
+ return {
86
+ id: @id,
87
+ title: @title,
88
+ site_name: @site_name,
89
+ site_name_cn: @site_name_cn,
90
+ abstract: @abstract,
91
+ content: @content,
92
+ author_names: @author_names,
93
+ source: @source,
94
+ files: @files,
95
+ images: @images,
96
+ videos: @videos,
97
+ audios: @audios,
98
+ links: @links,
99
+ domain: @domain,
100
+ keywords: @keywords,
101
+ html_content: @html_content,
102
+ lang: @lang,
103
+ country_cn: @country_cn,
104
+ country_code: @country_code,
105
+ created_at: @created_at,
106
+ updated_at: @updated_at,
107
+ created_time: @created_time,
108
+ oss_files: @oss_files,
109
+ oss_images: @oss_images,
110
+ customer_category: @customer_category,
111
+ category: @category,
112
+ topics: @topics,
113
+ tags: @tags,
114
+ views: @views,
115
+ comments: @comments,
116
+ reference: @reference,
117
+ mention_country: @mention_country,
118
+ authors: @authors,
119
+ sub_title: @sub_title,
120
+ timezone: @timezone,
121
+ timezone_location: @timezone_location,
122
+ }
123
+ end
124
+
125
+ def to_json
126
+ return as_json.to_json
127
+ end
128
+
129
+ def self.verify_keys
130
+ return {
131
+ "id"=> ["empty", "string"],
132
+ "title"=> ["empty", "string"],
133
+ "site_name"=> ["empty", "string"],
134
+ "site_name_cn"=> ["empty", "string"],
135
+ "content"=> ["empty", "string"],
136
+ "source"=> ["empty", "string"],
137
+ "files"=> ["json", "string"],
138
+ "images"=> ["json", "string"],
139
+ "videos"=> ["json", "string"],
140
+ "audios"=> ["json", "string"],
141
+ "links"=> ["json", "string"],
142
+ "domain"=> ["empty", "string"],
143
+ "keywords"=> ["json", "string"],
144
+ "lang"=> ["empty", "string"],
145
+ "country_cn"=> ["empty", "string"],
146
+ "country_code"=> ["empty", "string"],
147
+ "created_at"=> ["empty", "int", "length:13"],
148
+ "updated_at"=> ["empty", "int", "length:13"],
149
+ "created_time"=> ["empty", "int", "length:10"],
150
+ "oss_files"=> ["json", "string"],
151
+ "oss_images"=> ["json", "string"],
152
+ "topics"=> ["json", "string"],
153
+ "tags"=> ["string", "json"],
154
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
155
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
156
+ "timezone_location"=> ["empty", "string"],
157
+ }
158
+ end
159
+
160
+ end
@@ -0,0 +1,163 @@
1
+ class ThinktankInformation < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :state_info
10
+ attr_accessor :source
11
+ attr_accessor :files
12
+ attr_accessor :images
13
+ attr_accessor :videos
14
+ attr_accessor :audios
15
+ attr_accessor :links
16
+ attr_accessor :domain
17
+ attr_accessor :keywords
18
+ attr_accessor :html_content
19
+ attr_accessor :lang
20
+ attr_accessor :country_cn
21
+ attr_accessor :country_code
22
+ attr_accessor :created_at
23
+ attr_accessor :updated_at
24
+ attr_accessor :created_time
25
+ attr_accessor :oss_files
26
+ attr_accessor :oss_images
27
+ attr_accessor :customer_category
28
+ attr_accessor :category
29
+ attr_accessor :topics
30
+ attr_accessor :tags
31
+ attr_accessor :views
32
+ attr_accessor :comments
33
+ attr_accessor :reference
34
+ attr_accessor :mention_country
35
+ attr_accessor :authors
36
+ attr_accessor :sub_title
37
+ attr_accessor :timezone
38
+ attr_accessor :timezone_location
39
+
40
+ def self.table_name
41
+ return "thinktank_informations"
42
+ end
43
+ register
44
+
45
+
46
+ def initialize(options={})
47
+ options = JSON.parse(options.to_json)
48
+ @id = options["id"]
49
+ @title = options["title"]
50
+ @site_name = options["site_name"]
51
+ @site_name_cn = options["site_name_cn"]
52
+ @abstract = options["abstract"]
53
+ @content = options["content"]
54
+ @author_names = options["author_names"]
55
+ @state_info = options["state_info"]
56
+ @source = options["source"]
57
+ @files = options["files"]
58
+ @images = options["images"]
59
+ @videos = options["videos"]
60
+ @audios = options["audios"]
61
+ @links = options["links"]
62
+ @domain = options["domain"]
63
+ @keywords = options["keywords"]
64
+ @html_content = options["html_content"]
65
+ @lang = options["lang"]
66
+ @country_cn = options["country_cn"]
67
+ @country_code = options["country_code"]
68
+ @created_at = options["created_at"]
69
+ @updated_at = options["updated_at"]
70
+ @created_time = options["created_time"]
71
+ @oss_files = options["oss_files"]
72
+ @oss_images = options["oss_images"]
73
+ @customer_category = options["customer_category"]
74
+ @category = options["category"]
75
+ @topics = options["topics"]
76
+ @tags = options["tags"]
77
+ @views = options["views"]
78
+ @comments = options["comments"]
79
+ @reference = options["reference"]
80
+ @mention_country = options["mention_country"]
81
+ @authors = options["authors"]
82
+ @sub_title = options["sub_title"]
83
+ @timezone = options["timezone"]
84
+ @timezone_location = options["timezone_location"]
85
+ end
86
+
87
+ def as_json
88
+ return {
89
+ id: @id,
90
+ title: @title,
91
+ site_name: @site_name,
92
+ site_name_cn: @site_name_cn,
93
+ abstract: @abstract,
94
+ content: @content,
95
+ author_names: @author_names,
96
+ state_info: @state_info,
97
+ source: @source,
98
+ files: @files,
99
+ images: @images,
100
+ videos: @videos,
101
+ links: @links,
102
+ domain: @domain,
103
+ keywords: @keywords,
104
+ html_content: @html_content,
105
+ lang: @lang,
106
+ country_cn: @country_cn,
107
+ country_code: @country_code,
108
+ created_at: @created_at,
109
+ updated_at: @updated_at,
110
+ created_time: @created_time,
111
+ oss_files: @oss_files,
112
+ oss_images: @oss_images,
113
+ customer_category: @customer_category,
114
+ category: @category,
115
+ topics: @topics,
116
+ tags: @tags,
117
+ views: @views,
118
+ comments: @comments,
119
+ reference: @reference,
120
+ mention_country: @mention_country,
121
+ authors: @authors,
122
+ sub_title: @sub_title,
123
+ timezone: @timezone,
124
+ timezone_location: @timezone_location
125
+
126
+ }
127
+ end
128
+
129
+ def to_json
130
+ return as_json.to_json
131
+ end
132
+
133
+ def self.verify_keys
134
+ return {
135
+ "id"=> ["empty", "string"],
136
+ "title"=> ["empty", "string"],
137
+ "site_name"=> ["empty", "string"],
138
+ "site_name_cn"=> ["empty", "string"],
139
+ "content"=> ["empty", "string"],
140
+ "source"=> ["empty", "string"],
141
+ "files"=> ["json", "string"],
142
+ "images"=> ["json", "string"],
143
+ "videos"=> ["json", "string"],
144
+ "audios"=> ["json", "string"],
145
+ "links"=> ["json", "string"],
146
+ "domain"=> ["empty", "string"],
147
+ "keywords"=> ["json", "string"],
148
+ "lang"=> ["empty", "string"],
149
+ "country_cn"=> ["empty", "string"],
150
+ "country_code"=> ["empty", "string"],
151
+ "created_at"=> ["empty", "int", "length:13"],
152
+ "updated_at"=> ["empty", "int", "length:13"],
153
+ "created_time"=> ["empty", "int", "length:10"],
154
+ "oss_files"=> ["json", "string"],
155
+ "oss_images"=> ["json", "string"],
156
+ "topics"=> ["json", "string"],
157
+ "tags"=> ["string", "json"],
158
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
159
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
160
+ "timezone_location"=> ["empty", "string"],
161
+ }
162
+ end
163
+ end
@@ -0,0 +1,161 @@
1
+ class ThinktankReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_reports"
41
+ end
42
+ register
43
+
44
+
45
+ def initialize(options = {})
46
+ options = JSON.parse(options.to_json)
47
+ @id = options["id"]
48
+ @title = options["title"]
49
+ @site_name = options["site_name"]
50
+ @site_name_cn = options["site_name_cn"]
51
+ @abstract = options["abstract"]
52
+ @content = options["content"]
53
+ @author_names = options["author_names"]
54
+ @source = options["source"]
55
+ @files = options["files"]
56
+ @images = options["images"]
57
+ @videos = options["videos"]
58
+ @audios = options["audios"]
59
+ @links = options["links"]
60
+ @domain = options["domain"]
61
+ @keywords = options["keywords"]
62
+ @html_content = options["html_content"]
63
+ @lang = options["lang"]
64
+ @country_cn = options["country_cn"]
65
+ @country_code = options["country_code"]
66
+ @created_at = options["created_at"]
67
+ @updated_at = options["updated_at"]
68
+ @created_time = options["created_time"]
69
+ @oss_files = options["oss_files"]
70
+ @oss_images = options["oss_images"]
71
+ @customer_category = options["customer_category"]
72
+ @category = options["category"]
73
+ @topics = options["topics"]
74
+ @tags = options["tags"]
75
+ @views = options["views"]
76
+ @comments = options["comments"]
77
+ @reference = options["reference"]
78
+ @mention_country = options["mention_country"]
79
+ @authors = options["authors"]
80
+ @sub_title = options["sub_title"]
81
+ @timezone = options["timezone"]
82
+ @timezone_location = options["timezone_location"]
83
+ end
84
+
85
+ def to_json
86
+ return as_json.to_json
87
+ end
88
+
89
+ def as_json
90
+ return {
91
+ id: @id,
92
+ title: @title,
93
+ site_name: @site_name,
94
+ site_name_cn: @site_name_cn,
95
+ abstract: @abstract,
96
+ content: @content,
97
+ author_names: @author_names,
98
+ source: @source,
99
+ files: @files,
100
+ images: @images,
101
+ videos: @videos,
102
+ audios: @audios,
103
+ links: @links,
104
+ domain: @domain,
105
+ keywords: @keywords,
106
+ html_content: @html_content,
107
+ lang: @lang,
108
+ country_cn: @country_cn,
109
+ country_code: @country_code,
110
+ created_at: @created_at,
111
+ updated_at: @updated_at,
112
+ created_time: @created_time,
113
+ oss_files: @oss_files,
114
+ oss_images: @oss_images,
115
+ customer_category: @customer_category,
116
+ category: @category,
117
+ topics: @topics,
118
+ tags: @tags,
119
+ views: @views,
120
+ comments: @comments,
121
+ reference: @reference,
122
+ mention_country: @mention_country,
123
+ authors: @authors,
124
+ sub_title: @sub_title,
125
+ timezone: @timezone,
126
+ timezone_location: @timezone_location
127
+ }
128
+ end
129
+
130
+ def self.verify_keys
131
+ return {
132
+ "id"=> ["empty", "string"],
133
+ "title"=> ["empty", "string"],
134
+ "site_name"=> ["empty", "string"],
135
+ "site_name_cn"=> ["empty", "string"],
136
+ "content"=> ["empty", "string"],
137
+ "source"=> ["empty", "string"],
138
+ "files"=> ["json", "string"],
139
+ "images"=> ["json", "string"],
140
+ "videos"=> ["json", "string"],
141
+ "audios"=> ["json", "string"],
142
+ "links"=> ["json", "string"],
143
+ "domain"=> ["empty", "string"],
144
+ "keywords"=> ["json", "string"],
145
+ "lang"=> ["empty", "string"],
146
+ "country_cn"=> ["empty", "string"],
147
+ "country_code"=> ["empty", "string"],
148
+ "created_at"=> ["empty", "int", "length:13"],
149
+ "updated_at"=> ["empty", "int", "length:13"],
150
+ "created_time"=> ["empty", "int", "length:10"],
151
+ "oss_files"=> ["json", "string"],
152
+ "oss_images"=> ["json", "string"],
153
+ "topics"=> ["json", "string"],
154
+ "tags"=> ["string", "json"],
155
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
156
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
157
+ "timezone_location"=> ["empty", "string"],
158
+ }
159
+ end
160
+
161
+ end
@@ -0,0 +1,93 @@
1
+ class Verify
2
+ def self.IsVerified?(items=[],table_name="")
3
+
4
+ if table_name == "" && items.size > 0
5
+ table_name = switch_table(items[0])
6
+ elsif table_name != "" && items.size > 0
7
+ if !match_fields?(items[0],table_name)
8
+ return
9
+ end
10
+ end
11
+
12
+ if table_name == ""
13
+ puts "未匹配,不验证"
14
+ else
15
+ puts "验证:#{table_name}"
16
+ end
17
+
18
+ if table_name != ""
19
+ kclass = Object.const_get $map_models[table_name]
20
+
21
+ if kclass == nil
22
+ puts "ERROR #{table_name} 验证 not Exist!"
23
+ return true
24
+ end
25
+
26
+ items.each do |item|
27
+ if !kclass.new(item).verify
28
+ return false
29
+ end
30
+ end
31
+ end
32
+
33
+
34
+ return true
35
+ end
36
+
37
+ def self.switch_table(item={})
38
+ table_name = ""
39
+
40
+ item_keys = JSON.parse(item.to_json).keys
41
+ # puts "----#{item_keys}--"
42
+
43
+ $map_models.each do |k,v|
44
+ klass = Object.const_get v
45
+ klass_keys = klass.fields
46
+
47
+ # puts "----#{klass_keys}--"
48
+ sub_keys = item_keys - klass_keys
49
+ if sub_keys.size == 0 && item_keys.size == klass_keys.size
50
+ return k
51
+ end
52
+ end
53
+
54
+ return table_name
55
+ end
56
+
57
+ def self.match_fields?(item={},table_name)
58
+ if table_name == nil || table_name == ""
59
+ return false
60
+ end
61
+ item_keys = JSON.parse(item.to_json).keys
62
+ v = $map_models[table_name]
63
+
64
+ if v == nil
65
+ puts "ERROR #{table_name} not Exist!"
66
+ return false
67
+ end
68
+
69
+ klass = Object.const_get v
70
+ klass_keys = klass.fields
71
+
72
+ # puts "----#{klass_keys}--"
73
+ sub_keys1 = item_keys - klass_keys
74
+ if sub_keys1.size > 0
75
+ puts "ERROR #{sub_keys1} do not belong #{table_name}"
76
+ return false
77
+ end
78
+
79
+ sub_keys2 = klass_keys - item_keys
80
+ if sub_keys2.size > 0
81
+ puts "ERROR #{sub_keys2} do not Exist!"
82
+ return false
83
+ end
84
+
85
+ return true
86
+
87
+ end
88
+
89
+ def self.table_names
90
+ return $map_models.keys
91
+ end
92
+
93
+ end
data/ruby_sdk_test.rb CHANGED
@@ -10,6 +10,11 @@ def main
10
10
 
11
11
  CrawlabRubySdk.save_item(a)
12
12
 
13
+ # CrawlabRubySdk.save_item(a,"thinktank_experts")
14
+ # CrawlabRubySdk.save_item(a,"thinktank_expert_reports")
15
+ # CrawlabRubySdk.save_item(a,"thinktank_informations")
16
+ # CrawlabRubySdk.save_item(a,"thinktank_reports")
17
+
13
18
  item = {
14
19
  "id": "dec9d5415409cc9275f5590c145c3ccf",
15
20
  "title": "Association of Selected State Policies and Requirements for Buprenorphine Treatment With Per Capita Months of Treatment",
@@ -50,7 +55,6 @@ def main
50
55
  }
51
56
  CrawlabRubySdk.save_item(item)
52
57
 
53
-
54
58
  end
55
59
 
56
- main
60
+ # main
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlab_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - min
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-17 00:00:00.000000000 Z
11
+ date: 2023-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: grpc
@@ -90,12 +90,14 @@ files:
90
90
  - lib/entity/request_pb.rb
91
91
  - lib/entity/response_code_pb.rb
92
92
  - lib/entity/response_pb.rb
93
- - lib/entity/result_pb.rb
94
93
  - lib/entity/stream_message_code_pb.rb
95
- - lib/entity/stream_message_data_task_pb.rb
96
94
  - lib/entity/stream_message_pb.rb
97
- - lib/models/node_pb.rb
98
- - lib/models/task_pb.rb
95
+ - lib/models/base.rb
96
+ - lib/models/thinktank_expert.rb
97
+ - lib/models/thinktank_expert_report.rb
98
+ - lib/models/thinktank_information.rb
99
+ - lib/models/thinktank_report.rb
100
+ - lib/verify/verify.rb
99
101
  - ruby_sdk_test.rb
100
102
  homepage: https://github.com/rich-bro/crawlab_ruby_sdk
101
103
  licenses: []
@@ -1,15 +0,0 @@
1
- require 'google/protobuf'
2
-
3
- Google::Protobuf::DescriptorPool.generated_pool.build do
4
- add_file("entity/result.proto", :syntax => :proto3) do
5
- add_message "grpc.Result" do
6
- optional :_tid, :string, 1
7
- optional :name, :string, 2
8
- optional :age, :string, 3
9
- end
10
- end
11
- end
12
-
13
- module Grpc
14
- Result = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Result").msgclass
15
- end
@@ -1,58 +0,0 @@
1
- # # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # # source: entity/stream_message_data_task.proto
3
-
4
- # require 'google/protobuf'
5
-
6
- # Google::Protobuf::DescriptorPool.generated_pool.build do
7
- # add_file(PWD + "/proto/entity/stream_message_data_task.proto", :syntax => :proto3) do
8
- # add_message "grpc.StreamMessageDataTask" do
9
- # optional :task_id, :string, 1
10
- # optional :data, :string,:repeated, 2
11
- # end
12
- # end
13
- # end
14
-
15
- # module Grpc
16
- # StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
17
- # end
18
-
19
-
20
- # frozen_string_literal: true
21
- # Generated by the protocol buffer compiler. DO NOT EDIT!
22
- # source: entity/stream_message_data_task.proto
23
-
24
- require 'google/protobuf'
25
-
26
- # require 'entity/result_pb'
27
-
28
-
29
- descriptor_data = "\n%entity/stream_message_data_task.proto\x12\x04grpc\x1a\x13\x65ntity/result.proto\"D\n\x15StreamMessageDataTask\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x04\x64\x61ta\x18\x02 \x03(\x0b\x32\x0c.grpc.ResultB\x08Z\x06.;grpcb\x06proto3"
30
-
31
- pool = Google::Protobuf::DescriptorPool.generated_pool
32
-
33
- begin
34
- pool.add_serialized_file(descriptor_data)
35
- rescue TypeError => e
36
- # Compatibility code: will be removed in the next major version.
37
- require 'google/protobuf/descriptor_pb'
38
- parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
39
- parsed.clear_dependency
40
- serialized = parsed.class.encode(parsed)
41
- file = pool.add_serialized_file(serialized)
42
- warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
43
- imports = [
44
- ["grpc.Result", "entity/result.proto"],
45
- ]
46
- imports.each do |type_name, expected_filename|
47
- import_file = pool.lookup(type_name).file_descriptor
48
- if import_file.name != expected_filename
49
- warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
50
- end
51
- end
52
- warn "Each proto file must use a consistent fully-qualified name."
53
- warn "This will become an error in the next major version."
54
- end
55
-
56
- module Grpc
57
- StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
58
- end
@@ -1,27 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/node.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("proto/models/node.proto", :syntax => :proto3) do
8
- add_message "grpc.Node" do
9
- optional :_id, :string, 1
10
- optional :name, :string, 2
11
- optional :ip, :string, 3
12
- optional :port, :string, 5
13
- optional :mac, :string, 6
14
- optional :hostname, :string, 7
15
- optional :description, :string, 8
16
- optional :key, :string, 9
17
- optional :is_master, :bool, 11
18
- optional :update_ts, :string, 12
19
- optional :create_ts, :string, 13
20
- optional :update_ts_unix, :int64, 14
21
- end
22
- end
23
- end
24
-
25
- module Grpc
26
- Node = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Node").msgclass
27
- end
@@ -1,26 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/task.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("models/task.proto", :syntax => :proto3) do
8
- add_message "grpc.Task" do
9
- optional :_id, :string, 1
10
- optional :spider_id, :string, 2
11
- optional :status, :string, 5
12
- optional :node_id, :string, 6
13
- optional :cmd, :string, 8
14
- optional :param, :string, 9
15
- optional :error, :string, 10
16
- optional :pid, :int32, 16
17
- optional :run_type, :string, 17
18
- optional :schedule_id, :string, 18
19
- optional :type, :string, 19
20
- end
21
- end
22
- end
23
-
24
- module Grpc
25
- Task = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Task").msgclass
26
- end