crawlab_ruby_sdk 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c85282caf96edf6a9100a70b045bb1004a471db9d2a7842f83e77a28fbde32e
4
- data.tar.gz: cc64ce0004eeede08da51a5a7eec294f0db1b494d897079562a1578128960a1e
3
+ metadata.gz: c5c857dd2f243d47318786c542faec1a9fc7a6a52bba0edf7523313379b4f2d4
4
+ data.tar.gz: fbbacb701ad2bd5d090d85ba3322cd82a5c02f5d7d0d3ab729ee2b2b0d0882c1
5
5
  SHA512:
6
- metadata.gz: 012211e75c7de44e0493c931ce3d6d4cd8f1351da52ae4bd77da3aca95e39294d3c4c9e2c5cf2d5ebd877dcb1dbd2e45bc9f9b8ce2199ffdf3d71c3f6de517a4
7
- data.tar.gz: 380d04dddd44c608e80eebc23f601f23acaacc08fe291e6a711749e65b00d7aef664a8d30a0a0537bfec82c4e02620da3dfdbcf259bd1bd6aecc2bebaaf821d2
6
+ metadata.gz: 0f9752ef0ad2e59d72fa870a6493a8d831716c0422bb4d6d08a0afc3cadc08ee9b4b80eb97af5192ab1094ad7884533b3c7c75cc76d1a47fe7c0b90f991538d0
7
+ data.tar.gz: 47e6a0373338ad0d32b8286d2b422da55a67d5728655fca51a028a17928ee301b981d17cb396e06d23f8911ac37663b471360f8ecce97e55b7f8595e4e8096c1
data/README.md CHANGED
@@ -39,6 +39,19 @@ Or install it yourself as:
39
39
  puts bucket_url
40
40
  ```
41
41
 
42
+ ##### 字段验证规则
43
+
44
+ ```
45
+ CrawlabRubySdk.save_item({name: "haha",age:12},table_name)
46
+ CrawlabRubySdk.save_items([{name: "haha",age:12},{name:"456",age:34}],table_name)
47
+ table_name 可以是这几个["thinktank_expert_reports", "thinktank_informations", "thinktank_experts", "thinktank_reports"] 中的一个,也可以不传递
48
+
49
+ 1、如果当前采集的结果 存储的数据表是上面几个中的一个,无论是否传递,都会按照对应的表结构的 字段规则验证字段
50
+ 2、如果当前采集的结果,存储的数据表不属于上面几个中的一个,则按照传递传递的table_name,对应的字段规则验证字段
51
+ 3、如果没有传递table_name,并且采集的结果存储临时表,则根据推送的结果的字段匹配 上面四个表的字段,匹配上了就按照匹配上的表结构的字段规则验证,没有匹配上则不验证
52
+ ```
53
+
54
+
42
55
  ## Development
43
56
 
44
57
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,3 +1,3 @@
1
1
  module CrawlabRubySdk
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
@@ -36,6 +36,7 @@ module CrawlabRubySdk
36
36
  auth = "Crawlab2021!"
37
37
  end
38
38
 
39
+ table_name = get_table_name(table_name)
39
40
  if !Verify.IsVerified?([item],table_name)
40
41
  return
41
42
  end
@@ -57,7 +58,9 @@ module CrawlabRubySdk
57
58
  if auth==nil || auth == ""
58
59
  auth = "Crawlab2021!"
59
60
  end
60
- if !Verify.IsVerified?([item],table_name)
61
+
62
+ table_name = get_table_name(table_name)
63
+ if !Verify.IsVerified?(items,table_name)
61
64
  return
62
65
  end
63
66
 
@@ -99,10 +102,11 @@ module CrawlabRubySdk
99
102
 
100
103
  data = {task_id: task_id,data:records}.to_json.b
101
104
 
105
+ # puts "=====#{{task_id: task_id,data:records}.to_json}==="
106
+
102
107
  msg = Grpc::StreamMessage.new(code:3,data:data)
103
- puts data
104
108
 
105
- # sub_client.Send([msg])
109
+ sub_client.Send([msg])
106
110
  end
107
111
 
108
112
  def self.get_task_id
@@ -120,4 +124,14 @@ module CrawlabRubySdk
120
124
  def self.save_file_stream_to_oss(oss_path,stream)
121
125
  OssServerClient.new.send_stream(oss_path,stream)
122
126
  end
127
+
128
+ def self.get_table_name(table_name="")
129
+ table_names = Verify.table_names
130
+ if ENV["TABLE_NAME"] != nil && ENV["TABLE_NAME"] != "" && table_names.include?(ENV["TABLE_NAME"])
131
+ table_name = ENV["TABLE_NAME"]
132
+ return table_name
133
+ end
134
+
135
+ return table_name
136
+ end
123
137
  end
data/lib/models/base.rb CHANGED
@@ -85,6 +85,7 @@ class BaseModel
85
85
  rescue StandardError => e
86
86
  return false
87
87
  end
88
+ return true
88
89
  end
89
90
 
90
91
  def verify_string(v)
@@ -161,7 +162,7 @@ class BaseModel
161
162
  return false
162
163
  end
163
164
  fields_arr = fn.split(":")
164
- if length_arr.size < 2
165
+ if fields_arr.size < 2
165
166
  return false
166
167
  end
167
168
  fields_str = fields_arr[1]
@@ -169,15 +170,17 @@ class BaseModel
169
170
  fields = fields_str.split(",")
170
171
 
171
172
  begin
172
- datas = json.parse(v)
173
+ datas = JSON.parse(v)
173
174
  datas.each do |data|
174
175
  fields.each do |field|
175
176
  if data[field] == nil
177
+ puts "ERROR #{field} not Exist!"
176
178
  return false
177
179
  end
178
180
  end
179
181
  end
180
182
  rescue StandardError => e
183
+ puts e
181
184
  return false
182
185
  end
183
186
 
@@ -41,7 +41,40 @@ class ThinktankExpert < BaseModel
41
41
 
42
42
 
43
43
  def initialize(options={})
44
-
44
+ options = JSON.parse(options.to_json)
45
+ @id = options["id"]
46
+ @name = options["name"]
47
+ @title = options["title"]
48
+ @content = options["content"]
49
+ @location = options["location"]
50
+ @area_of_expertise = options["area_of_expertise"]
51
+ @profile_images = options["profile_images"]
52
+ @phone = options["phone"]
53
+ @email = options["email"]
54
+ @link = options["link"]
55
+ @audios = options["audios"]
56
+ @videos = options["videos"]
57
+ @education = options["education"]
58
+ @related_topics = options["related_topics"]
59
+ @site_name = options["site_name"]
60
+ @site_name_cn = options["site_name_cn"]
61
+ @domain = options["domain"]
62
+ @created_at = options["created_at"]
63
+ @updated_at = options["updated_at"]
64
+ @source = options["source"]
65
+ @oss_profile_images = options["oss_profile_images"]
66
+ @facebook = options["facebook"]
67
+ @twitter = options["twitter"]
68
+ @linkedin = options["linkedin"]
69
+ @instagram = options["instagram"]
70
+ @wikidata = options["wikidata"]
71
+ @person_type = options["person_type"]
72
+ @files = options["files"]
73
+ @oss_files = options["oss_files"]
74
+ @associated_program = options["associated_program"]
75
+ @lang = options["lang"]
76
+ @website = options["website"]
77
+ @nationalities = options["nationalities"]
45
78
  end
46
79
 
47
80
  def as_json
@@ -42,7 +42,43 @@ class ThinktankExpertReport < BaseModel
42
42
  register
43
43
 
44
44
  def initialize(options={})
45
-
45
+ options = JSON.parse(options.to_json)
46
+ @id = options["id"]
47
+ @title = options["title"]
48
+ @site_name = options["site_name"]
49
+ @site_name_cn = options["site_name_cn"]
50
+ @abstract = options["abstract"]
51
+ @content = options["content"]
52
+ @author_names = options["author_names"]
53
+ @source = options["source"]
54
+ @files = options["files"]
55
+ @images = options["images"]
56
+ @videos = options["videos"]
57
+ @audios = options["audios"]
58
+ @links = options["links"]
59
+ @domain = options["domain"]
60
+ @keywords= options["keywords"]
61
+ @html_content = options["html_content"]
62
+ @lang = options["lang"]
63
+ @country_cn = options["country_cn"]
64
+ @country_code = options["country_code"]
65
+ @created_at = options["created_at"]
66
+ @updated_at = options["updated_at"]
67
+ @created_time = options["created_time"]
68
+ @oss_files = options["oss_files"]
69
+ @oss_images = options["oss_images"]
70
+ @customer_category = options["customer_category"]
71
+ @category = options["category"]
72
+ @topics = options["topics"]
73
+ @tags = options["tags"]
74
+ @views = options["views"]
75
+ @comments = options["comments"]
76
+ @reference = options["reference"]
77
+ @mention_country = options["mention_country"]
78
+ @authors = options["authors"]
79
+ @sub_title = options["sub_title"]
80
+ @timezone = options["timezone"]
81
+ @timezone_location = options["timezone_location"]
46
82
  end
47
83
 
48
84
  def as_json
@@ -44,7 +44,44 @@ class ThinktankInformation < BaseModel
44
44
 
45
45
 
46
46
  def initialize(options={})
47
-
47
+ options = JSON.parse(options.to_json)
48
+ @id = options["id"]
49
+ @title = options["title"]
50
+ @site_name = options["site_name"]
51
+ @site_name_cn = options["site_name_cn"]
52
+ @abstract = options["abstract"]
53
+ @content = options["content"]
54
+ @author_names = options["author_names"]
55
+ @state_info = options["state_info"]
56
+ @source = options["source"]
57
+ @files = options["files"]
58
+ @images = options["images"]
59
+ @videos = options["videos"]
60
+ @audios = options["audios"]
61
+ @links = options["links"]
62
+ @domain = options["domain"]
63
+ @keywords = options["keywords"]
64
+ @html_content = options["html_content"]
65
+ @lang = options["lang"]
66
+ @country_cn = options["country_cn"]
67
+ @country_code = options["country_code"]
68
+ @created_at = options["created_at"]
69
+ @updated_at = options["updated_at"]
70
+ @created_time = options["created_time"]
71
+ @oss_files = options["oss_files"]
72
+ @oss_images = options["oss_images"]
73
+ @customer_category = options["customer_category"]
74
+ @category = options["category"]
75
+ @topics = options["topics"]
76
+ @tags = options["tags"]
77
+ @views = options["views"]
78
+ @comments = options["comments"]
79
+ @reference = options["reference"]
80
+ @mention_country = options["mention_country"]
81
+ @authors = options["authors"]
82
+ @sub_title = options["sub_title"]
83
+ @timezone = options["timezone"]
84
+ @timezone_location = options["timezone_location"]
48
85
  end
49
86
 
50
87
  def as_json
@@ -43,7 +43,43 @@ class ThinktankReport < BaseModel
43
43
 
44
44
 
45
45
  def initialize(options = {})
46
-
46
+ options = JSON.parse(options.to_json)
47
+ @id = options["id"]
48
+ @title = options["title"]
49
+ @site_name = options["site_name"]
50
+ @site_name_cn = options["site_name_cn"]
51
+ @abstract = options["abstract"]
52
+ @content = options["content"]
53
+ @author_names = options["author_names"]
54
+ @source = options["source"]
55
+ @files = options["files"]
56
+ @images = options["images"]
57
+ @videos = options["videos"]
58
+ @audios = options["audios"]
59
+ @links = options["links"]
60
+ @domain = options["domain"]
61
+ @keywords = options["keywords"]
62
+ @html_content = options["html_content"]
63
+ @lang = options["lang"]
64
+ @country_cn = options["country_cn"]
65
+ @country_code = options["country_code"]
66
+ @created_at = options["created_at"]
67
+ @updated_at = options["updated_at"]
68
+ @created_time = options["created_time"]
69
+ @oss_files = options["oss_files"]
70
+ @oss_images = options["oss_images"]
71
+ @customer_category = options["customer_category"]
72
+ @category = options["category"]
73
+ @topics = options["topics"]
74
+ @tags = options["tags"]
75
+ @views = options["views"]
76
+ @comments = options["comments"]
77
+ @reference = options["reference"]
78
+ @mention_country = options["mention_country"]
79
+ @authors = options["authors"]
80
+ @sub_title = options["sub_title"]
81
+ @timezone = options["timezone"]
82
+ @timezone_location = options["timezone_location"]
47
83
  end
48
84
 
49
85
  def to_json
data/lib/verify/verify.rb CHANGED
@@ -3,6 +3,10 @@ class Verify
3
3
 
4
4
  if table_name == "" && items.size > 0
5
5
  table_name = switch_table(items[0])
6
+ elsif table_name != "" && items.size > 0
7
+ if !match_fields?(items[0],table_name)
8
+ return
9
+ end
6
10
  end
7
11
 
8
12
  if table_name == ""
@@ -50,4 +54,40 @@ class Verify
50
54
  return table_name
51
55
  end
52
56
 
57
+ def self.match_fields?(item={},table_name)
58
+ if table_name == nil || table_name == ""
59
+ return false
60
+ end
61
+ item_keys = JSON.parse(item.to_json).keys
62
+ v = $map_models[table_name]
63
+
64
+ if v == nil
65
+ puts "ERROR #{table_name} not Exist!"
66
+ return false
67
+ end
68
+
69
+ klass = Object.const_get v
70
+ klass_keys = klass.fields
71
+
72
+ # puts "----#{klass_keys}--"
73
+ sub_keys1 = item_keys - klass_keys
74
+ if sub_keys1.size > 0
75
+ puts "ERROR #{sub_keys1} do not belong #{table_name}"
76
+ return false
77
+ end
78
+
79
+ sub_keys2 = klass_keys - item_keys
80
+ if sub_keys2.size > 0
81
+ puts "ERROR #{sub_keys2} do not Exist!"
82
+ return false
83
+ end
84
+
85
+ return true
86
+
87
+ end
88
+
89
+ def self.table_names
90
+ return $map_models.keys
91
+ end
92
+
53
93
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlab_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - min
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-10 00:00:00.000000000 Z
11
+ date: 2023-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: grpc