pod_ident 1.0.7 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a06172b4a5d265b29b1f6124d3ff0fcc1072110d9c6b28507b9c171dbfd422e
4
- data.tar.gz: 8aa51342b77eb61d70c8e606e8d9f717a56a7d890a1fb5b7f26f6f147dad3a86
3
+ metadata.gz: 8a65623d1fa8d1ed68eee9872ec22ba7fac7aa9f0295f00c229319b08a09528e
4
+ data.tar.gz: 1e0dd3b229e27445de6786e785fd657c719c1aa97bf77755baf34d5f3f539a7f
5
5
  SHA512:
6
- metadata.gz: 6c42085435f4c900f3e8d4a42f159820d5f669fcfe596b01c3b2e6ba1956a024b72dbcf71b496e46b7f1d17feb7e8526dc113ab10ee1503a7253a1a7ae3201a4
7
- data.tar.gz: cfa22d1cd0e6286ed1d3e909ba495f30ff23a76ed8550e234c41c946511fe7af0e38623dd435006bd73265dce9f0acb655809aa5b0446fe0100cd5a157a14e8f
6
+ metadata.gz: bb5d27b7af088f9ef7bca27ed898dacf54c823fb35329b18f6da7bcd018ee5279bfa939a95fa1f715674e50544d0ccd3b01ecae546e42f12e6f9ec0bf371f409
7
+ data.tar.gz: ecea67a6709cc4f96e4f07429eaaec189ae12a5213cdab384e9d7a0b5677837f3876564ad2e2ffb40c7dc631d912f3bc4069e29ab2658d92b500f592755196df
data/.rubocop.yml CHANGED
@@ -16,5 +16,5 @@ Metrics/BlockLength:
16
16
  - 'Rakefile'
17
17
  - '**/*.rake'
18
18
  - 'spec/**/*.rb'
19
- Lint/UnneededCopDisableDirective:
19
+ Lint/RedundantCopDisableDirective:
20
20
  Enabled: false
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.6
1
+ 3.2.0
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pod_ident (1.0.6)
4
+ pod_ident (1.1.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -33,7 +33,7 @@ PLATFORMS
33
33
  ruby
34
34
 
35
35
  DEPENDENCIES
36
- bundler (~> 1.16)
36
+ bundler (~> 2.3.20)
37
37
  pod_ident!
38
38
  pry
39
39
  rake (~> 13.0)
@@ -41,4 +41,4 @@ DEPENDENCIES
41
41
  rspec_junit_formatter
42
42
 
43
43
  BUNDLED WITH
44
- 1.17.3
44
+ 2.3.20
data/README.md CHANGED
@@ -57,6 +57,21 @@ puts result.platform
57
57
  => 'iPod'
58
58
  ```
59
59
 
60
+ ### Bot detection
61
+
62
+
63
+ ```ruby
64
+ require 'pod_ident'
65
+
66
+ user_agent = 'Mozilla/5.0 (compatible; archive.org_bot http://archive.org/details/archive.org_bot)'
67
+ result = PodIdent::Detector.detect(user_agent)
68
+
69
+ puts result.app
70
+ => 'Archive.org'
71
+ puts result.bot?
72
+ => true
73
+ ```
74
+
60
75
  ## Development
61
76
 
62
77
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -67,6 +82,12 @@ The detection rules are found as a yaml file under `lib/detection_rules.yml`.
67
82
 
68
83
  Please, only edit this file, and afterwards run `bin/parse-rules` in order to generate both the detection rules as a `rb` file for production usage, and the complete rules with their corresponding test cases for testing.
69
84
 
85
+ ### Update bot detection rules
86
+
87
+ For the bot detection rules we are using [ user-agents-v2](https://github.com/opawg/user-agents-v2/blob/master/src/bots.json), to import the json of this project and transform it into a yaml file readable by pod-ident you can run the following task: `rake update_bot_rules`.
88
+
89
+ Once the yml file created run `bin/parse-rules` to generate all rb detection rules files for bots.
90
+
70
91
  ## Contributing
71
92
 
72
93
  Bug reports and pull requests are welcome on GitHub at https://github.com/podigee/pod_ident.rb.
data/Rakefile CHANGED
@@ -1,6 +1,61 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+ require 'json'
4
+ require 'yaml'
5
+ require 'net/http'
6
+ require 'uri'
3
7
 
4
8
  RSpec::Core::RakeTask.new(:spec)
5
9
 
6
- task :default => :spec
10
+ task default: :spec
11
+
12
+ task :update_bot_rules do
13
+ BOT_JSON_SOURCE = 'https://raw.githubusercontent.com/opawg/user-agents-v2/master/src/bots.json'
14
+ BOT_JSON = File.expand_path('bots.json', __dir__)
15
+ BOT_YML = File.expand_path('lib/detection_rules_bots.yml', __dir__)
16
+
17
+ def update_bot_json
18
+ clean_json
19
+ uri = URI.parse(BOT_JSON_SOURCE)
20
+ response = Net::HTTP.get_response(uri)
21
+ File.open(BOT_JSON, 'w+') { |f| f.write(response.body) }
22
+ end
23
+
24
+ def clean_json
25
+ File.delete(BOT_JSON) if File.exist?(BOT_JSON)
26
+ end
27
+
28
+ def json_content
29
+ json = File.open(BOT_JSON).read
30
+ content = JSON.parse(json)
31
+ end
32
+
33
+ def generate_entry_yml(entry)
34
+ { 'app' => entry['name'],
35
+ 'match' => { 'regex' => (entry['pattern']) },
36
+ 'platform' => 'bot' }.merge(
37
+ entry_test_value(entry['examples'])
38
+ )
39
+ end
40
+
41
+ def entry_test_value(entry_examples)
42
+ return {} if entry_examples.nil?
43
+
44
+ examples = entry_examples.map do |example|
45
+ { 'userAgent' => example }
46
+ end
47
+
48
+ { 'test' => { 'userAgents' => examples } }
49
+ end
50
+
51
+ def write_yml_file(content)
52
+ File.open(BOT_YML, 'w+') { |f| f.write(content.to_yaml) }
53
+ clean_json
54
+ end
55
+
56
+ update_bot_json
57
+ all_entries = json_content['entries'].map do |entry|
58
+ generate_entry_yml(entry)
59
+ end
60
+ write_yml_file(all_entries)
61
+ end
@@ -53,16 +53,20 @@
53
53
  - app: Apple Podcasts
54
54
  match:
55
55
  includes: watchOS
56
+ excluding:
57
+ regex: '^atc/|\\(null\\) watchOS/'
56
58
  platform:
57
59
  text: watchOS
58
60
  test:
59
61
  userAgents:
60
- - userAgent: '(null)/(null) watchOS/5.0 model/Watch2,3 hwp/t8002 build/16R364 (6; dt:133)'
62
+ - userAgent: 'watchOS/5.0 model/Watch2,3 hwp/t8002 build/16R364 (6; dt:133)'
61
63
  platform: watchOS
62
64
 
63
65
  - app: Spotify
64
66
  match:
65
67
  startsWith: Spotify
68
+ excluding:
69
+ regex: '^Spotify/1\\.0$'
66
70
  platform:
67
71
  regexes:
68
72
  - '(Android|iOS)'
@@ -176,6 +180,8 @@
176
180
  - app: Overcast
177
181
  match:
178
182
  startsWith: Overcast
183
+ excluding:
184
+ regex: '^Overcast/1\\.0 Podcast Sync'
179
185
  platform:
180
186
  text: 'iOS'
181
187
  test:
@@ -429,6 +435,8 @@
429
435
  - app: Podbean
430
436
  match:
431
437
  startsWith: Podbean
438
+ excluding:
439
+ regex: "^Podbean Importer"
432
440
  platform:
433
441
  regex: '(iOS|Android)'
434
442
  test:
@@ -828,6 +836,8 @@
828
836
  - app: Twitter in-app browser
829
837
  match:
830
838
  includes: Twitter
839
+ excluding:
840
+ regex: "^TelegramBot "
831
841
  platform:
832
842
  regex: '(iPhone|iPad|Darwin|Android)'
833
843
  replacements:
@@ -946,6 +956,8 @@
946
956
  - app: Unknown client
947
957
  match:
948
958
  startsWith: '(null)'
959
+ excluding:
960
+ regex: '^atc/|\\(null\\) watchOS/'
949
961
  platform:
950
962
  regex: '(iPhone|iPad|iPod touch)'
951
963
  test:
@@ -966,6 +978,8 @@
966
978
  - app: Flipboard
967
979
  match:
968
980
  includes: Flipboard
981
+ excluding:
982
+ regex: FlipboardProxy/
969
983
  platform:
970
984
  regexes:
971
985
  - '(Android)'
@@ -979,8 +993,6 @@
979
993
  platform: Android
980
994
  - userAgent: Mozilla/5.0 (iPad; CPU OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Mobile/14E304 Flipboard/4.2.20
981
995
  platform: iPad
982
- - userAgent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0 (FlipboardProxy/1.2; +http://flipboard.com/browserproxy)
983
- platform: Mac
984
996
 
985
997
  - app: iVoox
986
998
  match:
@@ -1071,3 +1083,13 @@
1071
1083
  platform: Android
1072
1084
  - userAgent: "ARD Audiothek iOS Version 2.3.4, Build 41"
1073
1085
  platform: iOS
1086
+
1087
+ - app: Samsung Podcast
1088
+ match:
1089
+ startsWith: "sp-agent"
1090
+ platform:
1091
+ text: "Android"
1092
+ test:
1093
+ userAgents:
1094
+ - userAgent: "sp-agent"
1095
+ platform: Android