crawlab_ruby_sdk 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3fc9c6d1277660d63c17811f472dd56d438a4998c6d0c03e99d1a9e5c0357868
4
- data.tar.gz: 20b8ffa1ba0b1275c66f1a79cc80a479d39211b781174107a4cc21bf8ca86fb1
3
+ metadata.gz: c5c857dd2f243d47318786c542faec1a9fc7a6a52bba0edf7523313379b4f2d4
4
+ data.tar.gz: fbbacb701ad2bd5d090d85ba3322cd82a5c02f5d7d0d3ab729ee2b2b0d0882c1
5
5
  SHA512:
6
- metadata.gz: 5b925b182c7fa7fcbd8d7c9d3a2883e8acee1fa41eafc01a28aaa1ba78c037f8d17f1fed831846001b1761b1480854e908732ac5e8e3b500828267baebc1721c
7
- data.tar.gz: ae63a9d061c104cf8ad8c3e9c4d72b1a70f9f21dbe4c7b07d299ee5cfafe048a008f16ee26ed94efda8b64542aed817f33c2dbdd0323cab28d0c2ce1d8cd67ea
6
+ metadata.gz: 0f9752ef0ad2e59d72fa870a6493a8d831716c0422bb4d6d08a0afc3cadc08ee9b4b80eb97af5192ab1094ad7884533b3c7c75cc76d1a47fe7c0b90f991538d0
7
+ data.tar.gz: 47e6a0373338ad0d32b8286d2b422da55a67d5728655fca51a028a17928ee301b981d17cb396e06d23f8911ac37663b471360f8ecce97e55b7f8595e4e8096c1
data/README.md CHANGED
@@ -39,6 +39,19 @@ Or install it yourself as:
39
39
  puts bucket_url
40
40
  ```
41
41
 
42
+ ##### 字段验证规则
43
+
44
+ ```
45
+ CrawlabRubySdk.save_item({name: "haha",age:12},table_name)
46
+ CrawlabRubySdk.save_items([{name: "haha",age:12},{name:"456",age:34}],table_name)
47
+ table_name 可以是这几个["thinktank_expert_reports", "thinktank_informations", "thinktank_experts", "thinktank_reports"] 中的一个,也可以不传递
48
+
49
+ 1、如果当前采集的结果 存储的数据表是上面几个中的一个,无论是否传递,都会按照对应的表结构的 字段规则验证字段
50
+ 2、如果当前采集的结果,存储的数据表不属于上面几个中的一个,则按照传递传递的table_name,对应的字段规则验证字段
51
+ 3、如果没有传递table_name,并且采集的结果存储临时表,则根据推送的结果的字段匹配 上面四个表的字段,匹配上了就按照匹配上的表结构的字段规则验证,没有匹配上则不验证
52
+ ```
53
+
54
+
42
55
  ## Development
43
56
 
44
57
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,3 +1,3 @@
1
1
  module CrawlabRubySdk
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.8"
3
3
  end
@@ -1,4 +1,4 @@
1
- require "crawlab_ruby_sdk/version"
1
+ # require "crawlab_ruby_sdk/version"
2
2
  require "grpc"
3
3
  require "json"
4
4
  def traverse_dir(file_path)
@@ -14,17 +14,18 @@ dir = File.expand_path("..", __FILE__)
14
14
  # puts dir
15
15
 
16
16
  traverse_dir(dir+'/entity/stream_message_code_pb.rb')
17
- traverse_dir(dir+'/entity/result_pb.rb')
18
17
  traverse_dir(dir+'/entity/stream_message_pb.rb')
19
- traverse_dir(dir+'/entity/stream_message_data_task_pb.rb')
20
18
  traverse_dir(dir+'/client')
19
+ traverse_dir(dir+'/models/base.rb')
20
+ traverse_dir(dir+'/models')
21
+ traverse_dir(dir+'/verify')
21
22
 
22
23
  module CrawlabRubySdk
23
24
  class Error < StandardError; end
24
25
  # Your code goes here...
25
26
 
26
27
 
27
- def self.save_item(item={})
28
+ def self.save_item(item={},table_name="")
28
29
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
29
30
  if address==nil || address == ""
30
31
  address = "localhost:9666"
@@ -34,6 +35,11 @@ module CrawlabRubySdk
34
35
  if auth==nil || auth == ""
35
36
  auth = "Crawlab2021!"
36
37
  end
38
+
39
+ table_name = get_table_name(table_name)
40
+ if !Verify.IsVerified?([item],table_name)
41
+ return
42
+ end
37
43
 
38
44
  client = TaskServiceClient.new(address,auth)
39
45
 
@@ -42,7 +48,7 @@ module CrawlabRubySdk
42
48
  save(sub_client,[item])
43
49
  end
44
50
 
45
- def self.save_items(items=[])
51
+ def self.save_items(items=[],table_name="")
46
52
  address = ENV["CRAWLAB_GRPC_ADDRESS"]
47
53
  if address==nil || address == ""
48
54
  address = "localhost:9666"
@@ -52,6 +58,11 @@ module CrawlabRubySdk
52
58
  if auth==nil || auth == ""
53
59
  auth = "Crawlab2021!"
54
60
  end
61
+
62
+ table_name = get_table_name(table_name)
63
+ if !Verify.IsVerified?(items,table_name)
64
+ return
65
+ end
55
66
 
56
67
  client = TaskServiceClient.new(address,auth)
57
68
 
@@ -90,11 +101,8 @@ module CrawlabRubySdk
90
101
  end
91
102
 
92
103
  data = {task_id: task_id,data:records}.to_json.b
93
- # data = data.encode("utf-8")
94
- # puts data
95
104
 
96
- # data = data.encode('ASCII-8BIT', invalid: :replace, undef: :replace, replace: '')
97
- # puts data
105
+ # puts "=====#{{task_id: task_id,data:records}.to_json}==="
98
106
 
99
107
  msg = Grpc::StreamMessage.new(code:3,data:data)
100
108
 
@@ -116,4 +124,14 @@ module CrawlabRubySdk
116
124
  def self.save_file_stream_to_oss(oss_path,stream)
117
125
  OssServerClient.new.send_stream(oss_path,stream)
118
126
  end
127
+
128
+ def self.get_table_name(table_name="")
129
+ table_names = Verify.table_names
130
+ if ENV["TABLE_NAME"] != nil && ENV["TABLE_NAME"] != "" && table_names.include?(ENV["TABLE_NAME"])
131
+ table_name = ENV["TABLE_NAME"]
132
+ return table_name
133
+ end
134
+
135
+ return table_name
136
+ end
119
137
  end
@@ -0,0 +1,191 @@
1
+ require 'json'
2
+
3
+ $map_models = {}
4
+ class BaseModel
5
+ def self.table_name
6
+ return ""
7
+ end
8
+
9
+ def self.register
10
+ $map_models[table_name] = self.name
11
+ end
12
+
13
+ def self.fields
14
+ JSON.parse(self.new({}).as_json.to_json).keys
15
+ end
16
+
17
+ def self.verify_keys
18
+ return {}
19
+ end
20
+
21
+ def as_json
22
+ {}
23
+ end
24
+
25
+ def verify
26
+ json_data = as_json
27
+ verify_keys = self.class.verify_keys
28
+
29
+ verify_keys.each do |key,fns|
30
+ value = json_data[key.to_sym]
31
+ if fns.class.name == "Array"
32
+ fns.each do |fn|
33
+ if fn == "empty"
34
+ if !verify_empty(value)
35
+ puts "ERROR: #{key} cannot be empty!"
36
+ return false
37
+ end
38
+ elsif fn == "json"
39
+ if !verify_json(value)
40
+ puts "ERROR: #{key} json string parse fail!"
41
+ return false
42
+ end
43
+ elsif fn == "string"
44
+ if !verify_string(value)
45
+ puts "ERROR: #{key} field type is not string!"
46
+ return false
47
+ end
48
+ elsif fn == "int"
49
+ if !verify_int(value)
50
+ puts "ERROR: #{key} field type is not int!"
51
+ return false
52
+ end
53
+ elsif fn.include?("regex")
54
+ if !verify_regex(fn,value)
55
+ puts "ERROR: #{key} regex #{fn} match error"
56
+ return false
57
+ end
58
+ elsif fn.include?("length")
59
+ if !verify_length(fn,value)
60
+ puts "ERROR: #{key} length must be #{fn}"
61
+ return false
62
+ end
63
+ elsif fn.include?("fields")
64
+ if !verify_fields(fn,value)
65
+ puts "ERROR: #{key}:#{fn} not Exist!"
66
+ return false
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def verify_empty(v)
75
+ if v == nil || v == ""
76
+ return false
77
+ end
78
+ return true
79
+ end
80
+
81
+ def verify_json(v)
82
+ begin
83
+ JSON.parse(v)
84
+ return true
85
+ rescue StandardError => e
86
+ return false
87
+ end
88
+ return true
89
+ end
90
+
91
+ def verify_string(v)
92
+ if v == nil
93
+ return false
94
+ end
95
+ if v.class.name == "String"
96
+ return true
97
+ end
98
+ return false
99
+ end
100
+
101
+ def verify_int(v)
102
+ if v == nil
103
+ return false
104
+ end
105
+
106
+ if v.class.name == "Integer"
107
+ return true
108
+ end
109
+ return false
110
+ end
111
+
112
+ def verify_regex(fn,v)
113
+ if fn.class.name != "String"
114
+ return false
115
+ end
116
+ regex_arr = fn.split(":")
117
+ if regex_arr.size < 2
118
+ return false
119
+ end
120
+ regex_str = regex_arr[1]
121
+ if regex_str.size == 0
122
+ return false
123
+ end
124
+
125
+ if v == nil || v == ""
126
+ return false
127
+ end
128
+
129
+ if v =~ Regexp.new(regex_str)
130
+ return true
131
+ else
132
+ return false
133
+ end
134
+ end
135
+
136
+ def verify_length(fn,v)
137
+ if fn.class.name != "String"
138
+ return false
139
+ end
140
+ length_arr = fn.split(":")
141
+ if length_arr.size < 2
142
+ return false
143
+ end
144
+ length = length_arr[1].to_i
145
+ if length == 0
146
+ return false
147
+ end
148
+
149
+ if v == nil
150
+ return false
151
+ end
152
+
153
+ if v.to_s.size == length
154
+ return true
155
+ end
156
+
157
+ return false
158
+ end
159
+
160
+ def verify_fields(fn,v)
161
+ if fn.class.name != "String"
162
+ return false
163
+ end
164
+ fields_arr = fn.split(":")
165
+ if fields_arr.size < 2
166
+ return false
167
+ end
168
+ fields_str = fields_arr[1]
169
+
170
+ fields = fields_str.split(",")
171
+
172
+ begin
173
+ datas = JSON.parse(v)
174
+ datas.each do |data|
175
+ fields.each do |field|
176
+ if data[field] == nil
177
+ puts "ERROR #{field} not Exist!"
178
+ return false
179
+ end
180
+ end
181
+ end
182
+ rescue StandardError => e
183
+ puts e
184
+ return false
185
+ end
186
+
187
+ return true
188
+ end
189
+
190
+
191
+ end
@@ -0,0 +1,140 @@
1
+ class ThinktankExpert < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :name
4
+ attr_accessor :title
5
+ attr_accessor :content
6
+ attr_accessor :location
7
+ attr_accessor :area_of_expertise
8
+ attr_accessor :profile_images
9
+ attr_accessor :phone
10
+ attr_accessor :email
11
+ attr_accessor :link
12
+ attr_accessor :audios
13
+ attr_accessor :videos
14
+ attr_accessor :education
15
+ attr_accessor :related_topics
16
+ attr_accessor :site_name
17
+ attr_accessor :site_name_cn
18
+ attr_accessor :domain
19
+ attr_accessor :created_at
20
+ attr_accessor :updated_at
21
+ attr_accessor :source
22
+ attr_accessor :oss_profile_images
23
+ attr_accessor :facebook
24
+ attr_accessor :twitter
25
+ attr_accessor :linkedin
26
+ attr_accessor :instagram
27
+ attr_accessor :wikidata
28
+ attr_accessor :person_type
29
+ attr_accessor :files
30
+ attr_accessor :oss_files
31
+ attr_accessor :associated_program
32
+ attr_accessor :lang
33
+ attr_accessor :website
34
+ attr_accessor :nationalities
35
+
36
+
37
+ def self.table_name
38
+ return "thinktank_experts"
39
+ end
40
+ register
41
+
42
+
43
+ def initialize(options={})
44
+ options = JSON.parse(options.to_json)
45
+ @id = options["id"]
46
+ @name = options["name"]
47
+ @title = options["title"]
48
+ @content = options["content"]
49
+ @location = options["location"]
50
+ @area_of_expertise = options["area_of_expertise"]
51
+ @profile_images = options["profile_images"]
52
+ @phone = options["phone"]
53
+ @email = options["email"]
54
+ @link = options["link"]
55
+ @audios = options["audios"]
56
+ @videos = options["videos"]
57
+ @education = options["education"]
58
+ @related_topics = options["related_topics"]
59
+ @site_name = options["site_name"]
60
+ @site_name_cn = options["site_name_cn"]
61
+ @domain = options["domain"]
62
+ @created_at = options["created_at"]
63
+ @updated_at = options["updated_at"]
64
+ @source = options["source"]
65
+ @oss_profile_images = options["oss_profile_images"]
66
+ @facebook = options["facebook"]
67
+ @twitter = options["twitter"]
68
+ @linkedin = options["linkedin"]
69
+ @instagram = options["instagram"]
70
+ @wikidata = options["wikidata"]
71
+ @person_type = options["person_type"]
72
+ @files = options["files"]
73
+ @oss_files = options["oss_files"]
74
+ @associated_program = options["associated_program"]
75
+ @lang = options["lang"]
76
+ @website = options["website"]
77
+ @nationalities = options["nationalities"]
78
+ end
79
+
80
+ def as_json
81
+ return {
82
+ id: @id,
83
+ name: @name,
84
+ title: @title,
85
+ content: @content,
86
+ location: @location,
87
+ area_of_expertise: @area_of_expertise,
88
+ profile_images: @profile_images,
89
+ phone: @phone,
90
+ email: @email,
91
+ link: @link,
92
+ audios: @audios,
93
+ videos: @videos,
94
+ education: @education,
95
+ related_topics: @related_topics,
96
+ site_name: @site_name,
97
+ site_name_cn: @site_name_cn,
98
+ domain: @domain,
99
+ created_at: @created_at,
100
+ updated_at: @updated_at,
101
+ source: @source,
102
+ oss_profile_images: @oss_profile_images,
103
+ facebook: @facebook,
104
+ twitter: @twitter,
105
+ linkedin: @linkedin,
106
+ instagram: @instagram,
107
+ wikidata: @wikidata,
108
+ person_type: @person_type,
109
+ files: @files,
110
+ oss_files: @oss_files,
111
+ associated_program: @associated_program,
112
+ lang: @lang,
113
+ website: @website,
114
+ nationalities: @nationalities,
115
+ }
116
+ end
117
+
118
+ def to_json
119
+ return as_json.to_json
120
+ end
121
+
122
+ def self.verify_keys
123
+ return {
124
+ "id"=> ["empty", "string"],
125
+ "title"=> ["json", "string"],
126
+ "name"=> ["empty", "string"],
127
+ "site_name_cn"=>["empty", "string"],
128
+ "site_name"=> ["empty", "string"],
129
+ "source"=> ["empty", "string"],
130
+ "audios"=> ["json", "string"],
131
+ "videos"=> ["json", "string"],
132
+ "related_topics"=>["json", "string"],
133
+ "files"=> ["json", "string"],
134
+ "oss_files"=> ["json", "string"],
135
+ "domain"=> ["empty", "string"],
136
+ "created_at"=> ["empty", "int", "length:13"],
137
+ "updated_at"=> ["empty", "int", "length:13"],
138
+ }
139
+ end
140
+ end
@@ -0,0 +1,160 @@
1
+ class ThinktankExpertReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_expert_reports"
41
+ end
42
+ register
43
+
44
+ def initialize(options={})
45
+ options = JSON.parse(options.to_json)
46
+ @id = options["id"]
47
+ @title = options["title"]
48
+ @site_name = options["site_name"]
49
+ @site_name_cn = options["site_name_cn"]
50
+ @abstract = options["abstract"]
51
+ @content = options["content"]
52
+ @author_names = options["author_names"]
53
+ @source = options["source"]
54
+ @files = options["files"]
55
+ @images = options["images"]
56
+ @videos = options["videos"]
57
+ @audios = options["audios"]
58
+ @links = options["links"]
59
+ @domain = options["domain"]
60
+ @keywords= options["keywords"]
61
+ @html_content = options["html_content"]
62
+ @lang = options["lang"]
63
+ @country_cn = options["country_cn"]
64
+ @country_code = options["country_code"]
65
+ @created_at = options["created_at"]
66
+ @updated_at = options["updated_at"]
67
+ @created_time = options["created_time"]
68
+ @oss_files = options["oss_files"]
69
+ @oss_images = options["oss_images"]
70
+ @customer_category = options["customer_category"]
71
+ @category = options["category"]
72
+ @topics = options["topics"]
73
+ @tags = options["tags"]
74
+ @views = options["views"]
75
+ @comments = options["comments"]
76
+ @reference = options["reference"]
77
+ @mention_country = options["mention_country"]
78
+ @authors = options["authors"]
79
+ @sub_title = options["sub_title"]
80
+ @timezone = options["timezone"]
81
+ @timezone_location = options["timezone_location"]
82
+ end
83
+
84
+ def as_json
85
+ return {
86
+ id: @id,
87
+ title: @title,
88
+ site_name: @site_name,
89
+ site_name_cn: @site_name_cn,
90
+ abstract: @abstract,
91
+ content: @content,
92
+ author_names: @author_names,
93
+ source: @source,
94
+ files: @files,
95
+ images: @images,
96
+ videos: @videos,
97
+ audios: @audios,
98
+ links: @links,
99
+ domain: @domain,
100
+ keywords: @keywords,
101
+ html_content: @html_content,
102
+ lang: @lang,
103
+ country_cn: @country_cn,
104
+ country_code: @country_code,
105
+ created_at: @created_at,
106
+ updated_at: @updated_at,
107
+ created_time: @created_time,
108
+ oss_files: @oss_files,
109
+ oss_images: @oss_images,
110
+ customer_category: @customer_category,
111
+ category: @category,
112
+ topics: @topics,
113
+ tags: @tags,
114
+ views: @views,
115
+ comments: @comments,
116
+ reference: @reference,
117
+ mention_country: @mention_country,
118
+ authors: @authors,
119
+ sub_title: @sub_title,
120
+ timezone: @timezone,
121
+ timezone_location: @timezone_location,
122
+ }
123
+ end
124
+
125
+ def to_json
126
+ return as_json.to_json
127
+ end
128
+
129
+ def self.verify_keys
130
+ return {
131
+ "id"=> ["empty", "string"],
132
+ "title"=> ["empty", "string"],
133
+ "site_name"=> ["empty", "string"],
134
+ "site_name_cn"=> ["empty", "string"],
135
+ "content"=> ["empty", "string"],
136
+ "source"=> ["empty", "string"],
137
+ "files"=> ["json", "string"],
138
+ "images"=> ["json", "string"],
139
+ "videos"=> ["json", "string"],
140
+ "audios"=> ["json", "string"],
141
+ "links"=> ["json", "string"],
142
+ "domain"=> ["empty", "string"],
143
+ "keywords"=> ["json", "string"],
144
+ "lang"=> ["empty", "string"],
145
+ "country_cn"=> ["empty", "string"],
146
+ "country_code"=> ["empty", "string"],
147
+ "created_at"=> ["empty", "int", "length:13"],
148
+ "updated_at"=> ["empty", "int", "length:13"],
149
+ "created_time"=> ["empty", "int", "length:10"],
150
+ "oss_files"=> ["json", "string"],
151
+ "oss_images"=> ["json", "string"],
152
+ "topics"=> ["json", "string"],
153
+ "tags"=> ["string", "json"],
154
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
155
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
156
+ "timezone_location"=> ["empty", "string"],
157
+ }
158
+ end
159
+
160
+ end
@@ -0,0 +1,163 @@
1
+ class ThinktankInformation < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :state_info
10
+ attr_accessor :source
11
+ attr_accessor :files
12
+ attr_accessor :images
13
+ attr_accessor :videos
14
+ attr_accessor :audios
15
+ attr_accessor :links
16
+ attr_accessor :domain
17
+ attr_accessor :keywords
18
+ attr_accessor :html_content
19
+ attr_accessor :lang
20
+ attr_accessor :country_cn
21
+ attr_accessor :country_code
22
+ attr_accessor :created_at
23
+ attr_accessor :updated_at
24
+ attr_accessor :created_time
25
+ attr_accessor :oss_files
26
+ attr_accessor :oss_images
27
+ attr_accessor :customer_category
28
+ attr_accessor :category
29
+ attr_accessor :topics
30
+ attr_accessor :tags
31
+ attr_accessor :views
32
+ attr_accessor :comments
33
+ attr_accessor :reference
34
+ attr_accessor :mention_country
35
+ attr_accessor :authors
36
+ attr_accessor :sub_title
37
+ attr_accessor :timezone
38
+ attr_accessor :timezone_location
39
+
40
+ def self.table_name
41
+ return "thinktank_informations"
42
+ end
43
+ register
44
+
45
+
46
+ def initialize(options={})
47
+ options = JSON.parse(options.to_json)
48
+ @id = options["id"]
49
+ @title = options["title"]
50
+ @site_name = options["site_name"]
51
+ @site_name_cn = options["site_name_cn"]
52
+ @abstract = options["abstract"]
53
+ @content = options["content"]
54
+ @author_names = options["author_names"]
55
+ @state_info = options["state_info"]
56
+ @source = options["source"]
57
+ @files = options["files"]
58
+ @images = options["images"]
59
+ @videos = options["videos"]
60
+ @audios = options["audios"]
61
+ @links = options["links"]
62
+ @domain = options["domain"]
63
+ @keywords = options["keywords"]
64
+ @html_content = options["html_content"]
65
+ @lang = options["lang"]
66
+ @country_cn = options["country_cn"]
67
+ @country_code = options["country_code"]
68
+ @created_at = options["created_at"]
69
+ @updated_at = options["updated_at"]
70
+ @created_time = options["created_time"]
71
+ @oss_files = options["oss_files"]
72
+ @oss_images = options["oss_images"]
73
+ @customer_category = options["customer_category"]
74
+ @category = options["category"]
75
+ @topics = options["topics"]
76
+ @tags = options["tags"]
77
+ @views = options["views"]
78
+ @comments = options["comments"]
79
+ @reference = options["reference"]
80
+ @mention_country = options["mention_country"]
81
+ @authors = options["authors"]
82
+ @sub_title = options["sub_title"]
83
+ @timezone = options["timezone"]
84
+ @timezone_location = options["timezone_location"]
85
+ end
86
+
87
+ def as_json
88
+ return {
89
+ id: @id,
90
+ title: @title,
91
+ site_name: @site_name,
92
+ site_name_cn: @site_name_cn,
93
+ abstract: @abstract,
94
+ content: @content,
95
+ author_names: @author_names,
96
+ state_info: @state_info,
97
+ source: @source,
98
+ files: @files,
99
+ images: @images,
100
+ videos: @videos,
101
+ links: @links,
102
+ domain: @domain,
103
+ keywords: @keywords,
104
+ html_content: @html_content,
105
+ lang: @lang,
106
+ country_cn: @country_cn,
107
+ country_code: @country_code,
108
+ created_at: @created_at,
109
+ updated_at: @updated_at,
110
+ created_time: @created_time,
111
+ oss_files: @oss_files,
112
+ oss_images: @oss_images,
113
+ customer_category: @customer_category,
114
+ category: @category,
115
+ topics: @topics,
116
+ tags: @tags,
117
+ views: @views,
118
+ comments: @comments,
119
+ reference: @reference,
120
+ mention_country: @mention_country,
121
+ authors: @authors,
122
+ sub_title: @sub_title,
123
+ timezone: @timezone,
124
+ timezone_location: @timezone_location
125
+
126
+ }
127
+ end
128
+
129
+ def to_json
130
+ return as_json.to_json
131
+ end
132
+
133
+ def self.verify_keys
134
+ return {
135
+ "id"=> ["empty", "string"],
136
+ "title"=> ["empty", "string"],
137
+ "site_name"=> ["empty", "string"],
138
+ "site_name_cn"=> ["empty", "string"],
139
+ "content"=> ["empty", "string"],
140
+ "source"=> ["empty", "string"],
141
+ "files"=> ["json", "string"],
142
+ "images"=> ["json", "string"],
143
+ "videos"=> ["json", "string"],
144
+ "audios"=> ["json", "string"],
145
+ "links"=> ["json", "string"],
146
+ "domain"=> ["empty", "string"],
147
+ "keywords"=> ["json", "string"],
148
+ "lang"=> ["empty", "string"],
149
+ "country_cn"=> ["empty", "string"],
150
+ "country_code"=> ["empty", "string"],
151
+ "created_at"=> ["empty", "int", "length:13"],
152
+ "updated_at"=> ["empty", "int", "length:13"],
153
+ "created_time"=> ["empty", "int", "length:10"],
154
+ "oss_files"=> ["json", "string"],
155
+ "oss_images"=> ["json", "string"],
156
+ "topics"=> ["json", "string"],
157
+ "tags"=> ["string", "json"],
158
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
159
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
160
+ "timezone_location"=> ["empty", "string"],
161
+ }
162
+ end
163
+ end
@@ -0,0 +1,161 @@
1
+ class ThinktankReport < BaseModel
2
+ attr_accessor :id
3
+ attr_accessor :title
4
+ attr_accessor :site_name
5
+ attr_accessor :site_name_cn
6
+ attr_accessor :abstract
7
+ attr_accessor :content
8
+ attr_accessor :author_names
9
+ attr_accessor :source
10
+ attr_accessor :files
11
+ attr_accessor :images
12
+ attr_accessor :videos
13
+ attr_accessor :audios
14
+ attr_accessor :links
15
+ attr_accessor :domain
16
+ attr_accessor :keywords
17
+ attr_accessor :html_content
18
+ attr_accessor :lang
19
+ attr_accessor :country_cn
20
+ attr_accessor :country_code
21
+ attr_accessor :created_at
22
+ attr_accessor :updated_at
23
+ attr_accessor :created_time
24
+ attr_accessor :oss_files
25
+ attr_accessor :oss_images
26
+ attr_accessor :customer_category
27
+ attr_accessor :category
28
+ attr_accessor :topics
29
+ attr_accessor :tags
30
+ attr_accessor :views
31
+ attr_accessor :comments
32
+ attr_accessor :reference
33
+ attr_accessor :mention_country
34
+ attr_accessor :authors
35
+ attr_accessor :sub_title
36
+ attr_accessor :timezone
37
+ attr_accessor :timezone_location
38
+
39
+ def self.table_name
40
+ return "thinktank_reports"
41
+ end
42
+ register
43
+
44
+
45
+ def initialize(options = {})
46
+ options = JSON.parse(options.to_json)
47
+ @id = options["id"]
48
+ @title = options["title"]
49
+ @site_name = options["site_name"]
50
+ @site_name_cn = options["site_name_cn"]
51
+ @abstract = options["abstract"]
52
+ @content = options["content"]
53
+ @author_names = options["author_names"]
54
+ @source = options["source"]
55
+ @files = options["files"]
56
+ @images = options["images"]
57
+ @videos = options["videos"]
58
+ @audios = options["audios"]
59
+ @links = options["links"]
60
+ @domain = options["domain"]
61
+ @keywords = options["keywords"]
62
+ @html_content = options["html_content"]
63
+ @lang = options["lang"]
64
+ @country_cn = options["country_cn"]
65
+ @country_code = options["country_code"]
66
+ @created_at = options["created_at"]
67
+ @updated_at = options["updated_at"]
68
+ @created_time = options["created_time"]
69
+ @oss_files = options["oss_files"]
70
+ @oss_images = options["oss_images"]
71
+ @customer_category = options["customer_category"]
72
+ @category = options["category"]
73
+ @topics = options["topics"]
74
+ @tags = options["tags"]
75
+ @views = options["views"]
76
+ @comments = options["comments"]
77
+ @reference = options["reference"]
78
+ @mention_country = options["mention_country"]
79
+ @authors = options["authors"]
80
+ @sub_title = options["sub_title"]
81
+ @timezone = options["timezone"]
82
+ @timezone_location = options["timezone_location"]
83
+ end
84
+
85
+ def to_json
86
+ return as_json.to_json
87
+ end
88
+
89
+ def as_json
90
+ return {
91
+ id: @id,
92
+ title: @title,
93
+ site_name: @site_name,
94
+ site_name_cn: @site_name_cn,
95
+ abstract: @abstract,
96
+ content: @content,
97
+ author_names: @author_names,
98
+ source: @source,
99
+ files: @files,
100
+ images: @images,
101
+ videos: @videos,
102
+ audios: @audios,
103
+ links: @links,
104
+ domain: @domain,
105
+ keywords: @keywords,
106
+ html_content: @html_content,
107
+ lang: @lang,
108
+ country_cn: @country_cn,
109
+ country_code: @country_code,
110
+ created_at: @created_at,
111
+ updated_at: @updated_at,
112
+ created_time: @created_time,
113
+ oss_files: @oss_files,
114
+ oss_images: @oss_images,
115
+ customer_category: @customer_category,
116
+ category: @category,
117
+ topics: @topics,
118
+ tags: @tags,
119
+ views: @views,
120
+ comments: @comments,
121
+ reference: @reference,
122
+ mention_country: @mention_country,
123
+ authors: @authors,
124
+ sub_title: @sub_title,
125
+ timezone: @timezone,
126
+ timezone_location: @timezone_location
127
+ }
128
+ end
129
+
130
+ def self.verify_keys
131
+ return {
132
+ "id"=> ["empty", "string"],
133
+ "title"=> ["empty", "string"],
134
+ "site_name"=> ["empty", "string"],
135
+ "site_name_cn"=> ["empty", "string"],
136
+ "content"=> ["empty", "string"],
137
+ "source"=> ["empty", "string"],
138
+ "files"=> ["json", "string"],
139
+ "images"=> ["json", "string"],
140
+ "videos"=> ["json", "string"],
141
+ "audios"=> ["json", "string"],
142
+ "links"=> ["json", "string"],
143
+ "domain"=> ["empty", "string"],
144
+ "keywords"=> ["json", "string"],
145
+ "lang"=> ["empty", "string"],
146
+ "country_cn"=> ["empty", "string"],
147
+ "country_code"=> ["empty", "string"],
148
+ "created_at"=> ["empty", "int", "length:13"],
149
+ "updated_at"=> ["empty", "int", "length:13"],
150
+ "created_time"=> ["empty", "int", "length:10"],
151
+ "oss_files"=> ["json", "string"],
152
+ "oss_images"=> ["json", "string"],
153
+ "topics"=> ["json", "string"],
154
+ "tags"=> ["string", "json"],
155
+ "authors"=> ["json", "fields:author_id,author_name,author_url", "string"],
156
+ "timezone"=> ["empty", 'regex:[\+|-]\d{4}', "string"],
157
+ "timezone_location"=> ["empty", "string"],
158
+ }
159
+ end
160
+
161
+ end
@@ -0,0 +1,93 @@
1
+ class Verify
2
+ def self.IsVerified?(items=[],table_name="")
3
+
4
+ if table_name == "" && items.size > 0
5
+ table_name = switch_table(items[0])
6
+ elsif table_name != "" && items.size > 0
7
+ if !match_fields?(items[0],table_name)
8
+ return
9
+ end
10
+ end
11
+
12
+ if table_name == ""
13
+ puts "未匹配,不验证"
14
+ else
15
+ puts "验证:#{table_name}"
16
+ end
17
+
18
+ if table_name != ""
19
+ kclass = Object.const_get $map_models[table_name]
20
+
21
+ if kclass == nil
22
+ puts "ERROR #{table_name} 验证 not Exist!"
23
+ return true
24
+ end
25
+
26
+ items.each do |item|
27
+ if !kclass.new(item).verify
28
+ return false
29
+ end
30
+ end
31
+ end
32
+
33
+
34
+ return true
35
+ end
36
+
37
+ def self.switch_table(item={})
38
+ table_name = ""
39
+
40
+ item_keys = JSON.parse(item.to_json).keys
41
+ # puts "----#{item_keys}--"
42
+
43
+ $map_models.each do |k,v|
44
+ klass = Object.const_get v
45
+ klass_keys = klass.fields
46
+
47
+ # puts "----#{klass_keys}--"
48
+ sub_keys = item_keys - klass_keys
49
+ if sub_keys.size == 0 && item_keys.size == klass_keys.size
50
+ return k
51
+ end
52
+ end
53
+
54
+ return table_name
55
+ end
56
+
57
+ def self.match_fields?(item={},table_name)
58
+ if table_name == nil || table_name == ""
59
+ return false
60
+ end
61
+ item_keys = JSON.parse(item.to_json).keys
62
+ v = $map_models[table_name]
63
+
64
+ if v == nil
65
+ puts "ERROR #{table_name} not Exist!"
66
+ return false
67
+ end
68
+
69
+ klass = Object.const_get v
70
+ klass_keys = klass.fields
71
+
72
+ # puts "----#{klass_keys}--"
73
+ sub_keys1 = item_keys - klass_keys
74
+ if sub_keys1.size > 0
75
+ puts "ERROR #{sub_keys1} do not belong #{table_name}"
76
+ return false
77
+ end
78
+
79
+ sub_keys2 = klass_keys - item_keys
80
+ if sub_keys2.size > 0
81
+ puts "ERROR #{sub_keys2} do not Exist!"
82
+ return false
83
+ end
84
+
85
+ return true
86
+
87
+ end
88
+
89
+ def self.table_names
90
+ return $map_models.keys
91
+ end
92
+
93
+ end
data/ruby_sdk_test.rb CHANGED
@@ -10,6 +10,11 @@ def main
10
10
 
11
11
  CrawlabRubySdk.save_item(a)
12
12
 
13
+ # CrawlabRubySdk.save_item(a,"thinktank_experts")
14
+ # CrawlabRubySdk.save_item(a,"thinktank_expert_reports")
15
+ # CrawlabRubySdk.save_item(a,"thinktank_informations")
16
+ # CrawlabRubySdk.save_item(a,"thinktank_reports")
17
+
13
18
  item = {
14
19
  "id": "dec9d5415409cc9275f5590c145c3ccf",
15
20
  "title": "Association of Selected State Policies and Requirements for Buprenorphine Treatment With Per Capita Months of Treatment",
@@ -50,7 +55,6 @@ def main
50
55
  }
51
56
  CrawlabRubySdk.save_item(item)
52
57
 
53
-
54
58
  end
55
59
 
56
- main
60
+ # main
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlab_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - min
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-17 00:00:00.000000000 Z
11
+ date: 2023-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: grpc
@@ -90,12 +90,14 @@ files:
90
90
  - lib/entity/request_pb.rb
91
91
  - lib/entity/response_code_pb.rb
92
92
  - lib/entity/response_pb.rb
93
- - lib/entity/result_pb.rb
94
93
  - lib/entity/stream_message_code_pb.rb
95
- - lib/entity/stream_message_data_task_pb.rb
96
94
  - lib/entity/stream_message_pb.rb
97
- - lib/models/node_pb.rb
98
- - lib/models/task_pb.rb
95
+ - lib/models/base.rb
96
+ - lib/models/thinktank_expert.rb
97
+ - lib/models/thinktank_expert_report.rb
98
+ - lib/models/thinktank_information.rb
99
+ - lib/models/thinktank_report.rb
100
+ - lib/verify/verify.rb
99
101
  - ruby_sdk_test.rb
100
102
  homepage: https://github.com/rich-bro/crawlab_ruby_sdk
101
103
  licenses: []
@@ -1,15 +0,0 @@
1
- require 'google/protobuf'
2
-
3
- Google::Protobuf::DescriptorPool.generated_pool.build do
4
- add_file("entity/result.proto", :syntax => :proto3) do
5
- add_message "grpc.Result" do
6
- optional :_tid, :string, 1
7
- optional :name, :string, 2
8
- optional :age, :string, 3
9
- end
10
- end
11
- end
12
-
13
- module Grpc
14
- Result = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Result").msgclass
15
- end
@@ -1,58 +0,0 @@
1
- # # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # # source: entity/stream_message_data_task.proto
3
-
4
- # require 'google/protobuf'
5
-
6
- # Google::Protobuf::DescriptorPool.generated_pool.build do
7
- # add_file(PWD + "/proto/entity/stream_message_data_task.proto", :syntax => :proto3) do
8
- # add_message "grpc.StreamMessageDataTask" do
9
- # optional :task_id, :string, 1
10
- # optional :data, :string,:repeated, 2
11
- # end
12
- # end
13
- # end
14
-
15
- # module Grpc
16
- # StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
17
- # end
18
-
19
-
20
- # frozen_string_literal: true
21
- # Generated by the protocol buffer compiler. DO NOT EDIT!
22
- # source: entity/stream_message_data_task.proto
23
-
24
- require 'google/protobuf'
25
-
26
- # require 'entity/result_pb'
27
-
28
-
29
- descriptor_data = "\n%entity/stream_message_data_task.proto\x12\x04grpc\x1a\x13\x65ntity/result.proto\"D\n\x15StreamMessageDataTask\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x04\x64\x61ta\x18\x02 \x03(\x0b\x32\x0c.grpc.ResultB\x08Z\x06.;grpcb\x06proto3"
30
-
31
- pool = Google::Protobuf::DescriptorPool.generated_pool
32
-
33
- begin
34
- pool.add_serialized_file(descriptor_data)
35
- rescue TypeError => e
36
- # Compatibility code: will be removed in the next major version.
37
- require 'google/protobuf/descriptor_pb'
38
- parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
39
- parsed.clear_dependency
40
- serialized = parsed.class.encode(parsed)
41
- file = pool.add_serialized_file(serialized)
42
- warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
43
- imports = [
44
- ["grpc.Result", "entity/result.proto"],
45
- ]
46
- imports.each do |type_name, expected_filename|
47
- import_file = pool.lookup(type_name).file_descriptor
48
- if import_file.name != expected_filename
49
- warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
50
- end
51
- end
52
- warn "Each proto file must use a consistent fully-qualified name."
53
- warn "This will become an error in the next major version."
54
- end
55
-
56
- module Grpc
57
- StreamMessageDataTask = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.StreamMessageDataTask").msgclass
58
- end
@@ -1,27 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/node.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("proto/models/node.proto", :syntax => :proto3) do
8
- add_message "grpc.Node" do
9
- optional :_id, :string, 1
10
- optional :name, :string, 2
11
- optional :ip, :string, 3
12
- optional :port, :string, 5
13
- optional :mac, :string, 6
14
- optional :hostname, :string, 7
15
- optional :description, :string, 8
16
- optional :key, :string, 9
17
- optional :is_master, :bool, 11
18
- optional :update_ts, :string, 12
19
- optional :create_ts, :string, 13
20
- optional :update_ts_unix, :int64, 14
21
- end
22
- end
23
- end
24
-
25
- module Grpc
26
- Node = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Node").msgclass
27
- end
@@ -1,26 +0,0 @@
1
- # Generated by the protocol buffer compiler. DO NOT EDIT!
2
- # source: models/task.proto
3
-
4
- require 'google/protobuf'
5
-
6
- Google::Protobuf::DescriptorPool.generated_pool.build do
7
- add_file("models/task.proto", :syntax => :proto3) do
8
- add_message "grpc.Task" do
9
- optional :_id, :string, 1
10
- optional :spider_id, :string, 2
11
- optional :status, :string, 5
12
- optional :node_id, :string, 6
13
- optional :cmd, :string, 8
14
- optional :param, :string, 9
15
- optional :error, :string, 10
16
- optional :pid, :int32, 16
17
- optional :run_type, :string, 17
18
- optional :schedule_id, :string, 18
19
- optional :type, :string, 19
20
- end
21
- end
22
- end
23
-
24
- module Grpc
25
- Task = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Task").msgclass
26
- end