pod_ident 1.0.8 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.ruby-version +1 -1
- data/Gemfile.lock +3 -3
- data/README.md +21 -0
- data/Rakefile +58 -3
- data/lib/detection_rules.yml +15 -3
- data/lib/detection_rules_bots.yml +832 -0
- data/lib/pod_ident/detection_result.rb +4 -0
- data/lib/pod_ident/detection_rules.rb +1 -1
- data/lib/pod_ident/detection_rules_bots.rb +3 -0
- data/lib/pod_ident/rule_parser.rb +32 -1
- data/lib/pod_ident/version.rb +1 -1
- data/lib/pod_ident.rb +20 -2
- data/pod_ident.gemspec +1 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a65623d1fa8d1ed68eee9872ec22ba7fac7aa9f0295f00c229319b08a09528e
|
4
|
+
data.tar.gz: 1e0dd3b229e27445de6786e785fd657c719c1aa97bf77755baf34d5f3f539a7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb5d27b7af088f9ef7bca27ed898dacf54c823fb35329b18f6da7bcd018ee5279bfa939a95fa1f715674e50544d0ccd3b01ecae546e42f12e6f9ec0bf371f409
|
7
|
+
data.tar.gz: ecea67a6709cc4f96e4f07429eaaec189ae12a5213cdab384e9d7a0b5677837f3876564ad2e2ffb40c7dc631d912f3bc4069e29ab2658d92b500f592755196df
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
3.2.0
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pod_ident (1.0
|
4
|
+
pod_ident (1.1.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -33,7 +33,7 @@ PLATFORMS
|
|
33
33
|
ruby
|
34
34
|
|
35
35
|
DEPENDENCIES
|
36
|
-
bundler (~>
|
36
|
+
bundler (~> 2.3.20)
|
37
37
|
pod_ident!
|
38
38
|
pry
|
39
39
|
rake (~> 13.0)
|
@@ -41,4 +41,4 @@ DEPENDENCIES
|
|
41
41
|
rspec_junit_formatter
|
42
42
|
|
43
43
|
BUNDLED WITH
|
44
|
-
|
44
|
+
2.3.20
|
data/README.md
CHANGED
@@ -57,6 +57,21 @@ puts result.platform
|
|
57
57
|
=> 'iPod'
|
58
58
|
```
|
59
59
|
|
60
|
+
### Bot detection
|
61
|
+
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
require 'pod_ident'
|
65
|
+
|
66
|
+
user_agent = 'Mozilla/5.0 (compatible; archive.org_bot http://archive.org/details/archive.org_bot)'
|
67
|
+
result = PodIdent::Detector.detect(user_agent)
|
68
|
+
|
69
|
+
puts result.app
|
70
|
+
=> 'Archive.org'
|
71
|
+
puts result.bot?
|
72
|
+
=> true
|
73
|
+
```
|
74
|
+
|
60
75
|
## Development
|
61
76
|
|
62
77
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -67,6 +82,12 @@ The detection rules are found as a yaml file under `lib/detection_rules.yml`.
|
|
67
82
|
|
68
83
|
Please, only edit this file, and afterwards run `bin/parse-rules` in order to generate both the detection rules as a `rb` file for production usage, and the complete rules with their corresponding test cases for testing.
|
69
84
|
|
85
|
+
### Update bot detection rules
|
86
|
+
|
87
|
+
For the bot detection rules we are using [ user-agents-v2](https://github.com/opawg/user-agents-v2/blob/master/src/bots.json), to import the json of this project and transform it into a yaml file readable by pod-ident you can run the following task: `rake update_bot_rules`.
|
88
|
+
|
89
|
+
Once the yml file created run `bin/parse-rules` to generate all rb detection rules files for bots.
|
90
|
+
|
70
91
|
## Contributing
|
71
92
|
|
72
93
|
Bug reports and pull requests are welcome on GitHub at https://github.com/podigee/pod_ident.rb.
|
data/Rakefile
CHANGED
@@ -1,6 +1,61 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'json'
|
4
|
+
require 'yaml'
|
5
|
+
require 'net/http'
|
6
|
+
require 'uri'
|
3
7
|
|
4
8
|
RSpec::Core::RakeTask.new(:spec)
|
5
9
|
|
6
|
-
task :
|
10
|
+
task default: :spec
|
11
|
+
|
12
|
+
task :update_bot_rules do
|
13
|
+
BOT_JSON_SOURCE = 'https://raw.githubusercontent.com/opawg/user-agents-v2/master/src/bots.json'
|
14
|
+
BOT_JSON = File.expand_path('bots.json', __dir__)
|
15
|
+
BOT_YML = File.expand_path('lib/detection_rules_bots.yml', __dir__)
|
16
|
+
|
17
|
+
def update_bot_json
|
18
|
+
clean_json
|
19
|
+
uri = URI.parse(BOT_JSON_SOURCE)
|
20
|
+
response = Net::HTTP.get_response(uri)
|
21
|
+
File.open(BOT_JSON, 'w+') { |f| f.write(response.body) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def clean_json
|
25
|
+
File.delete(BOT_JSON) if File.exist?(BOT_JSON)
|
26
|
+
end
|
27
|
+
|
28
|
+
def json_content
|
29
|
+
json = File.open(BOT_JSON).read
|
30
|
+
content = JSON.parse(json)
|
31
|
+
end
|
32
|
+
|
33
|
+
def generate_entry_yml(entry)
|
34
|
+
{ 'app' => entry['name'],
|
35
|
+
'match' => { 'regex' => (entry['pattern']) },
|
36
|
+
'platform' => 'bot' }.merge(
|
37
|
+
entry_test_value(entry['examples'])
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
def entry_test_value(entry_examples)
|
42
|
+
return {} if entry_examples.nil?
|
43
|
+
|
44
|
+
examples = entry_examples.map do |example|
|
45
|
+
{ 'userAgent' => example }
|
46
|
+
end
|
47
|
+
|
48
|
+
{ 'test' => { 'userAgents' => examples } }
|
49
|
+
end
|
50
|
+
|
51
|
+
def write_yml_file(content)
|
52
|
+
File.open(BOT_YML, 'w+') { |f| f.write(content.to_yaml) }
|
53
|
+
clean_json
|
54
|
+
end
|
55
|
+
|
56
|
+
update_bot_json
|
57
|
+
all_entries = json_content['entries'].map do |entry|
|
58
|
+
generate_entry_yml(entry)
|
59
|
+
end
|
60
|
+
write_yml_file(all_entries)
|
61
|
+
end
|
data/lib/detection_rules.yml
CHANGED
@@ -53,16 +53,20 @@
|
|
53
53
|
- app: Apple Podcasts
|
54
54
|
match:
|
55
55
|
includes: watchOS
|
56
|
+
excluding:
|
57
|
+
regex: '^atc/|\\(null\\) watchOS/'
|
56
58
|
platform:
|
57
59
|
text: watchOS
|
58
60
|
test:
|
59
61
|
userAgents:
|
60
|
-
- userAgent: '
|
62
|
+
- userAgent: 'watchOS/5.0 model/Watch2,3 hwp/t8002 build/16R364 (6; dt:133)'
|
61
63
|
platform: watchOS
|
62
64
|
|
63
65
|
- app: Spotify
|
64
66
|
match:
|
65
67
|
startsWith: Spotify
|
68
|
+
excluding:
|
69
|
+
regex: '^Spotify/1\\.0$'
|
66
70
|
platform:
|
67
71
|
regexes:
|
68
72
|
- '(Android|iOS)'
|
@@ -176,6 +180,8 @@
|
|
176
180
|
- app: Overcast
|
177
181
|
match:
|
178
182
|
startsWith: Overcast
|
183
|
+
excluding:
|
184
|
+
regex: '^Overcast/1\\.0 Podcast Sync'
|
179
185
|
platform:
|
180
186
|
text: 'iOS'
|
181
187
|
test:
|
@@ -429,6 +435,8 @@
|
|
429
435
|
- app: Podbean
|
430
436
|
match:
|
431
437
|
startsWith: Podbean
|
438
|
+
excluding:
|
439
|
+
regex: "^Podbean Importer"
|
432
440
|
platform:
|
433
441
|
regex: '(iOS|Android)'
|
434
442
|
test:
|
@@ -828,6 +836,8 @@
|
|
828
836
|
- app: Twitter in-app browser
|
829
837
|
match:
|
830
838
|
includes: Twitter
|
839
|
+
excluding:
|
840
|
+
regex: "^TelegramBot "
|
831
841
|
platform:
|
832
842
|
regex: '(iPhone|iPad|Darwin|Android)'
|
833
843
|
replacements:
|
@@ -946,6 +956,8 @@
|
|
946
956
|
- app: Unknown client
|
947
957
|
match:
|
948
958
|
startsWith: '(null)'
|
959
|
+
excluding:
|
960
|
+
regex: '^atc/|\\(null\\) watchOS/'
|
949
961
|
platform:
|
950
962
|
regex: '(iPhone|iPad|iPod touch)'
|
951
963
|
test:
|
@@ -966,6 +978,8 @@
|
|
966
978
|
- app: Flipboard
|
967
979
|
match:
|
968
980
|
includes: Flipboard
|
981
|
+
excluding:
|
982
|
+
regex: FlipboardProxy/
|
969
983
|
platform:
|
970
984
|
regexes:
|
971
985
|
- '(Android)'
|
@@ -979,8 +993,6 @@
|
|
979
993
|
platform: Android
|
980
994
|
- userAgent: Mozilla/5.0 (iPad; CPU OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Mobile/14E304 Flipboard/4.2.20
|
981
995
|
platform: iPad
|
982
|
-
- userAgent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0 (FlipboardProxy/1.2; +http://flipboard.com/browserproxy)
|
983
|
-
platform: Mac
|
984
996
|
|
985
997
|
- app: iVoox
|
986
998
|
match:
|