udger 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/MIT-LICENSE +20 -0
- data/README.md +130 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/udger.rb +6 -0
- data/lib/udger/base_parser.rb +27 -0
- data/lib/udger/ip_parser.rb +107 -0
- data/lib/udger/object.rb +74 -0
- data/lib/udger/parser.rb +56 -0
- data/lib/udger/ua_parser.rb +221 -0
- data/lib/udger/version.rb +3 -0
- data/udger.gemspec +40 -0
- metadata +145 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 28464c9d0822abeeb3455da475fe10ea832692fc
|
4
|
+
data.tar.gz: aee465243f6064ada0d14e6616afc88133f0f19f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 653c0d6ae1fc9ed506c43567a286189e2dbfa837e05961cda06b65ac318a1a943d577c8521ba28c8ee4b850f3328d9170484c2162053d847a8ee556319875775
|
7
|
+
data.tar.gz: de56778029ecb0e13899d88e30893190e377b8f6c741afe919b0e836a5f36b2f409f70499148b576c609300602a7b5a2bb0fb7a2f350dc547772cb238536a723
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2017 TowerData
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
# Udger
|
2
|
+
|
3
|
+
## Installation
|
4
|
+
|
5
|
+
Add this line to your application's Gemfile:
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
gem 'udger'
|
9
|
+
```
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install udger
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
User agent and ip parser for Udger db.
|
22
|
+
|
23
|
+
|
24
|
+
require 'udger'
|
25
|
+
parser = Udger::Parser.new('path_to_udger_db', options) # File must be named 'udgerdb_v3.dat'
|
26
|
+
|
27
|
+
|
28
|
+
### Options
|
29
|
+
|
30
|
+
- cache - default is true, enable caching results. Only available for user agent parsing.
|
31
|
+
- lru_cache_size - default is 10,000. How many objects to be cached.
|
32
|
+
- ua_services - if you do not need all the information for a user agent, you can specify which services to receive. Available services are: [:crawler, :client, :os, :device, :device_market]. Reducing the number of services will improve performances.
|
33
|
+
|
34
|
+
|
35
|
+
### Parsing user agent
|
36
|
+
|
37
|
+
parser.parse_ua('Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0')
|
38
|
+
|
39
|
+
This returns a struct with the following data. If data are not present, it will return nil value.
|
40
|
+
|
41
|
+
- ua_class
|
42
|
+
- ua_class_code
|
43
|
+
- ua
|
44
|
+
- ua_version
|
45
|
+
- ua_version_major
|
46
|
+
- ua_uptodate_current_version
|
47
|
+
- ua_family
|
48
|
+
- ua_family_code
|
49
|
+
- ua_family_homepage
|
50
|
+
- ua_family_vendor
|
51
|
+
- ua_family_vendor_code
|
52
|
+
- ua_family_vendor_homepage
|
53
|
+
- ua_family_icon
|
54
|
+
- ua_family_icon_big
|
55
|
+
- ua_family_info_url
|
56
|
+
- ua_engine
|
57
|
+
- os
|
58
|
+
- os_code
|
59
|
+
- os_homepage
|
60
|
+
- os_icon
|
61
|
+
- os_icon_big
|
62
|
+
- os_info_url
|
63
|
+
- os_family
|
64
|
+
- os_family_code
|
65
|
+
- os_family_vendor
|
66
|
+
- os_family_vendor_code
|
67
|
+
- os_family_vendor_homepage
|
68
|
+
- device_class
|
69
|
+
- device_class_code
|
70
|
+
- device_class_icon
|
71
|
+
- device_class_icon_big
|
72
|
+
- device_class_info_url
|
73
|
+
- device_marketname
|
74
|
+
- device_brand
|
75
|
+
- device_brand_code
|
76
|
+
- device_brand_homepage
|
77
|
+
- device_brand_icon
|
78
|
+
- device_brand_icon_big
|
79
|
+
- device_brand_info_url
|
80
|
+
- crawler_last_seen
|
81
|
+
- crawler_category
|
82
|
+
- crawler_category_code
|
83
|
+
- crawler_respect_robotstxt
|
84
|
+
|
85
|
+
### Parsing ip
|
86
|
+
|
87
|
+
parser.parse_ip('108.61.199.93')
|
88
|
+
parser.parse_ip('2a02:598:111::9')
|
89
|
+
|
90
|
+
|
91
|
+
This returns a struct with the following data. If data are not present, it will return nil value.
|
92
|
+
|
93
|
+
- ip_ver
|
94
|
+
- ip_classification
|
95
|
+
- ip_classification_code
|
96
|
+
- ip_hostname
|
97
|
+
- ip_last_seen
|
98
|
+
- ip_country
|
99
|
+
- ip_country_code
|
100
|
+
- ip_city
|
101
|
+
- crawler_name
|
102
|
+
- crawler_ver
|
103
|
+
- crawler_ver_major
|
104
|
+
- crawler_family
|
105
|
+
- crawler_family_code
|
106
|
+
- crawler_family_homepage
|
107
|
+
- crawler_family_vendor
|
108
|
+
- crawler_family_vendor_code
|
109
|
+
- crawler_family_vendor_homepage
|
110
|
+
- crawler_family_icon
|
111
|
+
- crawler_family_info_url
|
112
|
+
- crawler_last_seen
|
113
|
+
- crawler_category
|
114
|
+
- crawler_category_code
|
115
|
+
- crawler_respect_robotstxt
|
116
|
+
- datacenter_name
|
117
|
+
- datacenter_name_code
|
118
|
+
- datacenter_homepage
|
119
|
+
|
120
|
+
|
121
|
+
## Development
|
122
|
+
|
123
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
124
|
+
|
125
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
126
|
+
|
127
|
+
## Contributing
|
128
|
+
|
129
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/TowerData/udger.
|
130
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "udger"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/udger.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Udger
|
2
|
+
class BaseParser
|
3
|
+
attr_accessor :db
|
4
|
+
|
5
|
+
def initialize(db)
|
6
|
+
@db = db
|
7
|
+
end
|
8
|
+
|
9
|
+
protected
|
10
|
+
|
11
|
+
def regexp(string)
|
12
|
+
r1 = string.index('/')
|
13
|
+
r2 = string.length - string.reverse.index('/') - 1
|
14
|
+
Regexp.new string[r1 + 1..r2 - 1], true
|
15
|
+
end
|
16
|
+
|
17
|
+
def regexp_parse(query, _cache = true)
|
18
|
+
db.execute(query) do |row|
|
19
|
+
match = ua_string.scan(regexp(row['regstring']))
|
20
|
+
unless match.empty?
|
21
|
+
yield match, row
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'ipaddr'
|
2
|
+
module Udger
|
3
|
+
class IpParser < BaseParser
|
4
|
+
attr_accessor :db, :object, :ip
|
5
|
+
|
6
|
+
def initialize(db, ip)
|
7
|
+
super(db)
|
8
|
+
@ip = ip
|
9
|
+
@object = IpAddress.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse
|
13
|
+
return unless @ip
|
14
|
+
object.ip = @ip
|
15
|
+
parse_ip_object
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_ip_object
|
19
|
+
ip_object = fetch_ip()
|
20
|
+
return if ip_object.nil?
|
21
|
+
object.ip_ver = ip_object.ipv4? ? 4 : 6
|
22
|
+
query = "SELECT udger_crawler_list.id as botid,ip_last_seen,ip_hostname,ip_country,ip_city,ip_country_code,ip_classification,ip_classification_code,
|
23
|
+
name,ver,ver_major,last_seen,respect_robotstxt,family,family_code,family_homepage,family_icon,vendor,vendor_code,vendor_homepage,crawler_classification,crawler_classification_code
|
24
|
+
FROM udger_ip_list
|
25
|
+
JOIN udger_ip_class ON udger_ip_class.id=udger_ip_list.class_id
|
26
|
+
LEFT JOIN udger_crawler_list ON udger_crawler_list.id=udger_ip_list.crawler_id
|
27
|
+
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
|
28
|
+
WHERE ip=? ORDER BY sequence"
|
29
|
+
data = db.execute(query, ip_object.to_s)
|
30
|
+
unless data.empty?
|
31
|
+
result = data[0]
|
32
|
+
object.ip_classification = result['ip_classification']
|
33
|
+
object.ip_classification_code = result['ip_classification_code']
|
34
|
+
object.ip_last_seen = result['ip_last_seen']
|
35
|
+
object.ip_hostname = result['ip_hostname']
|
36
|
+
object.ip_country = result['ip_country']
|
37
|
+
object.ip_country_code = result['ip_country_code']
|
38
|
+
object.ip_city = result['ip_city']
|
39
|
+
object.crawler_name = result['name']
|
40
|
+
object.crawler_ver = result['ver']
|
41
|
+
object.crawler_ver_major = result['ver_major']
|
42
|
+
object.crawler_family = result['family']
|
43
|
+
object.crawler_family_code = result['family_code']
|
44
|
+
object.crawler_family_homepage = result['family_homepage']
|
45
|
+
object.crawler_family_vendor = result['vendor']
|
46
|
+
object.crawler_family_vendor_code = result['vendor_code']
|
47
|
+
object.crawler_family_vendor_homepage = result['vendor_homepage']
|
48
|
+
object.crawler_family_icon = result['family_icon']
|
49
|
+
if result['ip_classification_code'] == 'crawler'
|
50
|
+
object.crawler_family_info_url = "https://udger.com/resources/ua-list/bot-detail?bot=#{result['family']}#id#{result['botid']}"
|
51
|
+
end
|
52
|
+
object.crawler_last_seen = result['last_seen']
|
53
|
+
object.crawler_category = result['crawler_classification']
|
54
|
+
object.crawler_category_code = result['crawler_classification_code']
|
55
|
+
object.crawler_respect_robotstxt = result['respect_robotstxt']
|
56
|
+
else
|
57
|
+
object.ip_classification = 'Unrecognized'
|
58
|
+
object.ip_classification_code = 'Unrecognized'
|
59
|
+
end
|
60
|
+
|
61
|
+
if object.ip_ver == 4
|
62
|
+
ip4_format(ip_object)
|
63
|
+
else
|
64
|
+
ip6_format(ip_object)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def ip4_format(ip_object)
|
69
|
+
query = 'SELECT name,name_code,homepage
|
70
|
+
FROM udger_datacenter_range
|
71
|
+
JOIN udger_datacenter_list ON udger_datacenter_range.datacenter_id=udger_datacenter_list.id
|
72
|
+
WHERE iplong_from <= ? AND iplong_to >= ?'
|
73
|
+
ip_int = ip_object.to_i
|
74
|
+
data = db.execute(query, ip_int, ip_int)
|
75
|
+
return if data.empty?
|
76
|
+
result = data[0]
|
77
|
+
object.datacenter_name = result['name']
|
78
|
+
object.datacenter_name_code = result['name_code']
|
79
|
+
object.datacenter_homepage = result['homepage']
|
80
|
+
end
|
81
|
+
|
82
|
+
def ip6_format(ip_object)
|
83
|
+
ip_range = ip_object.to_string.split(':').map { |x| x.to_i(16) }
|
84
|
+
query = 'SELECT name,name_code,homepage
|
85
|
+
FROM udger_datacenter_range6
|
86
|
+
JOIN udger_datacenter_list ON udger_datacenter_range6.datacenter_id=udger_datacenter_list.id
|
87
|
+
WHERE '
|
88
|
+
ip_range.each_with_index do |value, index|
|
89
|
+
query += " iplong_from#{index} <= #{value} AND iplong_to#{index} >= #{value}"
|
90
|
+
query += ' AND ' if index < 7
|
91
|
+
end
|
92
|
+
|
93
|
+
data = db.execute(query)
|
94
|
+
return if data.empty?
|
95
|
+
result = data[0]
|
96
|
+
object.datacenter_name = result['name']
|
97
|
+
object.datacenter_name_code = result['name_code']
|
98
|
+
object.datacenter_homepage = result['homepage']
|
99
|
+
end
|
100
|
+
|
101
|
+
def fetch_ip
|
102
|
+
IPAddr.new @ip
|
103
|
+
rescue
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/udger/object.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module Udger
|
2
|
+
UserAgent = Struct.new :ua_string,
|
3
|
+
:ua_class,
|
4
|
+
:ua_class_code,
|
5
|
+
:ua,
|
6
|
+
:ua_version,
|
7
|
+
:ua_version_major,
|
8
|
+
:ua_uptodate_current_version,
|
9
|
+
:ua_family,
|
10
|
+
:ua_family_code,
|
11
|
+
:ua_family_homepage,
|
12
|
+
:ua_family_vendor,
|
13
|
+
:ua_family_vendor_code,
|
14
|
+
:ua_family_vendor_homepage,
|
15
|
+
:ua_family_icon,
|
16
|
+
:ua_family_icon_big,
|
17
|
+
:ua_family_info_url,
|
18
|
+
:ua_engine,
|
19
|
+
:os,
|
20
|
+
:os_code,
|
21
|
+
:os_homepage,
|
22
|
+
:os_icon,
|
23
|
+
:os_icon_big,
|
24
|
+
:os_info_url,
|
25
|
+
:os_family,
|
26
|
+
:os_family_code,
|
27
|
+
:os_family_vendor,
|
28
|
+
:os_family_vendor_code,
|
29
|
+
:os_family_vendor_homepage,
|
30
|
+
:device_class,
|
31
|
+
:device_class_code,
|
32
|
+
:device_class_icon,
|
33
|
+
:device_class_icon_big,
|
34
|
+
:device_class_info_url,
|
35
|
+
:device_marketname,
|
36
|
+
:device_brand,
|
37
|
+
:device_brand_code,
|
38
|
+
:device_brand_homepage,
|
39
|
+
:device_brand_icon,
|
40
|
+
:device_brand_icon_big,
|
41
|
+
:device_brand_info_url,
|
42
|
+
:crawler_last_seen,
|
43
|
+
:crawler_category,
|
44
|
+
:crawler_category_code,
|
45
|
+
:crawler_respect_robotstxt
|
46
|
+
|
47
|
+
IpAddress = Struct.new :ip,
|
48
|
+
:ip_ver,
|
49
|
+
:ip_classification,
|
50
|
+
:ip_classification_code,
|
51
|
+
:ip_hostname,
|
52
|
+
:ip_last_seen,
|
53
|
+
:ip_country,
|
54
|
+
:ip_country_code,
|
55
|
+
:ip_city,
|
56
|
+
:crawler_name,
|
57
|
+
:crawler_ver,
|
58
|
+
:crawler_ver_major,
|
59
|
+
:crawler_family,
|
60
|
+
:crawler_family_code,
|
61
|
+
:crawler_family_homepage,
|
62
|
+
:crawler_family_vendor,
|
63
|
+
:crawler_family_vendor_code,
|
64
|
+
:crawler_family_vendor_homepage,
|
65
|
+
:crawler_family_icon,
|
66
|
+
:crawler_family_info_url,
|
67
|
+
:crawler_last_seen,
|
68
|
+
:crawler_category,
|
69
|
+
:crawler_category_code,
|
70
|
+
:crawler_respect_robotstxt,
|
71
|
+
:datacenter_name,
|
72
|
+
:datacenter_name_code,
|
73
|
+
:datacenter_homepage
|
74
|
+
end
|
data/lib/udger/parser.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'sqlite3'
|
2
|
+
require 'lru_redux'
|
3
|
+
module Udger
|
4
|
+
class Parser
|
5
|
+
DB_FILENAME = 'udgerdb_v3.dat'.freeze
|
6
|
+
attr_reader :db
|
7
|
+
attr_reader :data_dir
|
8
|
+
attr_accessor :cache
|
9
|
+
attr_accessor :lru_cache_size
|
10
|
+
|
11
|
+
def initialize(data_dir = nil, lru_cache_size: 10_000, cache: true, ua_services: [])
|
12
|
+
@data_dir = data_dir || __dir__
|
13
|
+
@db = SQLite3::Database.new "#{@data_dir}/#{DB_FILENAME}"
|
14
|
+
@db.results_as_hash = true
|
15
|
+
@cache = cache
|
16
|
+
parse_ua_params(ua_services)
|
17
|
+
@lru_cache = lru_cache_size.zero? && !cache ? {} : LruRedux::Cache.new(lru_cache_size)
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def parse_ua(ua_string)
|
22
|
+
if @cache
|
23
|
+
cache_string = ua_string.hash
|
24
|
+
cache_value = @lru_cache[cache_string]
|
25
|
+
return cache_value if cache_value
|
26
|
+
result = parse_ua_no_cache(ua_string)
|
27
|
+
@lru_cache[cache_string] = result
|
28
|
+
result
|
29
|
+
else
|
30
|
+
parse_ua_no_cache(ua_string)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_ip(ip)
|
35
|
+
parser = IpParser.new @db, ip
|
36
|
+
parser.parse
|
37
|
+
parser.object
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def parse_ua_params(parse)
|
43
|
+
parsers = parse.empty?
|
44
|
+
@match_ua = {}
|
45
|
+
[:crawler, :client, :os, :device, :device_market].each do |p|
|
46
|
+
@match_ua[p] = parsers || parse.include?(p)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_ua_no_cache(ua_string)
|
51
|
+
parser = UaParser.new @db, ua_string, @match_ua
|
52
|
+
parser.parse
|
53
|
+
parser.object
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
module Udger
|
2
|
+
class UaParser < BaseParser
|
3
|
+
attr_accessor :db, :ua_string, :object
|
4
|
+
|
5
|
+
def initialize(db, ua_string, crawler: true, client: true, os: true, device: true, device_market: true)
|
6
|
+
super(db)
|
7
|
+
|
8
|
+
@match_crawler = crawler
|
9
|
+
@match_client = client
|
10
|
+
@match_os = os
|
11
|
+
@match_device = device
|
12
|
+
@match_device_market = device_market
|
13
|
+
|
14
|
+
@ua_string = ua_string
|
15
|
+
@object = UserAgent.new
|
16
|
+
@os_id = 0
|
17
|
+
@client_id = 0
|
18
|
+
@client_class_id = -1
|
19
|
+
@deviceclass_id = 0
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse
|
23
|
+
return unless ua_string
|
24
|
+
object.ua_string = ua_string
|
25
|
+
crawler_data = @match_crawler ? parse_crawler : []
|
26
|
+
if !crawler_data.empty?
|
27
|
+
format_crawler_data crawler_data[0]
|
28
|
+
else
|
29
|
+
parse_client if @match_client
|
30
|
+
if @match_os || @match_device_market
|
31
|
+
parse_os
|
32
|
+
parse_client_os
|
33
|
+
end
|
34
|
+
parse_device if @match_device
|
35
|
+
devise_market_name if @match_device_market
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def parse_crawler
|
43
|
+
query = "SELECT udger_crawler_list.id as botid,name,ver,ver_major,last_seen,respect_robotstxt,family,family_code,family_homepage,family_icon,vendor,vendor_code,vendor_homepage,crawler_classification,crawler_classification_code
|
44
|
+
FROM udger_crawler_list
|
45
|
+
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
|
46
|
+
WHERE ua_string=?"
|
47
|
+
db.execute(query, ua_string)
|
48
|
+
end
|
49
|
+
|
50
|
+
def format_crawler_data(result)
|
51
|
+
@client_class_id = 99
|
52
|
+
@object.ua_class = 'Crawler'
|
53
|
+
@object.ua_class_code = 'crawler'
|
54
|
+
@object.ua = result['name']
|
55
|
+
@object.ua_version = result['ver']
|
56
|
+
@object.ua_version_major = result['ver_major']
|
57
|
+
@object.ua_family = result['family']
|
58
|
+
@object.ua_family_code = result['family_code']
|
59
|
+
@object.ua_family_homepage = result['family_homepage']
|
60
|
+
@object.ua_family_vendor = result['vendor']
|
61
|
+
@object.ua_family_vendor_code = result['vendor_code']
|
62
|
+
@object.ua_family_vendor_homepage = result['vendor_homepage']
|
63
|
+
@object.ua_family_icon = result['family_icon']
|
64
|
+
@object.ua_family_info_url = "https://udger.com/resources/ua-list/bot-detail?bot=#{result['family']}#id#{result['botid']}"
|
65
|
+
@object.crawler_last_seen = result['last_seen']
|
66
|
+
@object.crawler_category = result['crawler_classification']
|
67
|
+
@object.crawler_category_code = result['crawler_classification_code']
|
68
|
+
@object.crawler_respect_robotstxt = result['respect_robotstxt']
|
69
|
+
end
|
70
|
+
|
71
|
+
def parse_client
|
72
|
+
query = "SELECT class_id,client_id,regstring,name,name_code,homepage,icon,icon_big,engine,vendor,vendor_code,vendor_homepage,uptodate_current_version,client_classification,client_classification_code
|
73
|
+
FROM udger_client_regex
|
74
|
+
JOIN udger_client_list ON udger_client_list.id=udger_client_regex.client_id
|
75
|
+
JOIN udger_client_class ON udger_client_class.id=udger_client_list.class_id
|
76
|
+
ORDER BY sequence ASC"
|
77
|
+
|
78
|
+
regexp_parse(query) do |match, result|
|
79
|
+
@client_id = result['client_id']
|
80
|
+
@client_class_id = result['class_id']
|
81
|
+
|
82
|
+
object.ua_class = result['client_classification']
|
83
|
+
object.ua_class_code = result['client_classification_code']
|
84
|
+
if match[0].is_a? Array
|
85
|
+
string = match[0][0]
|
86
|
+
object.ua = "#{result['name']} #{string}"
|
87
|
+
object.ua_version = string
|
88
|
+
object.ua_version_major = string.split('.')[0]
|
89
|
+
else
|
90
|
+
object.ua = result['name']
|
91
|
+
end
|
92
|
+
object.ua_uptodate_current_version = result['uptodate_current_version']
|
93
|
+
object.ua_family = result['name']
|
94
|
+
object.ua_family_code = result['name_code']
|
95
|
+
object.ua_family_homepage = result['homepage']
|
96
|
+
object.ua_family_vendor = result['vendor']
|
97
|
+
object.ua_family_vendor_code = result['vendor_code']
|
98
|
+
object.ua_family_vendor_homepage = result['vendor_homepage']
|
99
|
+
object.ua_family_icon = result['icon']
|
100
|
+
object.ua_family_icon_big = result['icon_big']
|
101
|
+
object.ua_family_info_url = 'https://udger.com/resources/ua-list/browser-detail?browser=' + result['name']
|
102
|
+
object.ua_engine = result['engine']
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def parse_os
|
107
|
+
query = "SELECT os_id,regstring,family,family_code,name,name_code,homepage,icon,icon_big,vendor,vendor_code,vendor_homepage
|
108
|
+
FROM udger_os_regex
|
109
|
+
JOIN udger_os_list ON udger_os_list.id=udger_os_regex.os_id
|
110
|
+
ORDER BY sequence ASC"
|
111
|
+
|
112
|
+
regexp_parse(query) do |_match, result|
|
113
|
+
@os_id = result['os_id']
|
114
|
+
object.os = result['name']
|
115
|
+
object.os_code = result['name_code']
|
116
|
+
object.os_homepage = result['homepage']
|
117
|
+
object.os_icon = result['icon']
|
118
|
+
object.os_icon_big = result['icon_big']
|
119
|
+
object.os_info_url = 'https://udger.com/resources/ua-list/os-detail?os=' + result['name']
|
120
|
+
object.os_family = result['family']
|
121
|
+
object.os_family_code = result['family_code']
|
122
|
+
object.os_family_vendor = result['vendor']
|
123
|
+
object.os_family_vendor_code = result['vendor_code']
|
124
|
+
object.os_family_vendor_homepage = result['vendor_homepage']
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def parse_client_os
|
129
|
+
return if !@os_id.zero? || @client_id.zero?
|
130
|
+
|
131
|
+
query = 'SELECT os_id,family,family_code,name,name_code,homepage,icon,icon_big,vendor,vendor_code,vendor_homepage
|
132
|
+
FROM udger_client_os_relation
|
133
|
+
JOIN udger_os_list ON udger_os_list.id=udger_client_os_relation.os_id
|
134
|
+
WHERE client_id=?'
|
135
|
+
|
136
|
+
data = db.execute(query, @client_id)
|
137
|
+
return if data.empty?
|
138
|
+
result = data[0]
|
139
|
+
@os_id = result['os_id']
|
140
|
+
object.os = result['name']
|
141
|
+
object.os_code = result['name_code']
|
142
|
+
object.os_homepage = result['homepage']
|
143
|
+
object.os_icon = result['icon']
|
144
|
+
object.os_icon_big = result['icon_big']
|
145
|
+
object.os_info_url = "https://udger.com/resources/ua-list/os-detail?os=#{result['name']}"
|
146
|
+
object.os_family = result['family']
|
147
|
+
object.os_family_code = result['family_code']
|
148
|
+
object.os_family_vendor = result['vendor']
|
149
|
+
object.os_family_vendor_code = result['vendor_code']
|
150
|
+
object.os_family_vendor_homepage = result['vendor_homepage']
|
151
|
+
end
|
152
|
+
|
153
|
+
def parse_device
|
154
|
+
|
155
|
+
query = 'SELECT deviceclass_id,regstring,name,name_code,icon,icon_big
|
156
|
+
FROM udger_deviceclass_regex
|
157
|
+
JOIN udger_deviceclass_list ON udger_deviceclass_list.id=udger_deviceclass_regex.deviceclass_id
|
158
|
+
ORDER BY sequence ASC'
|
159
|
+
|
160
|
+
regexp_parse(query) do |_match, result|
|
161
|
+
@deviceclass_id = result['deviceclass_id']
|
162
|
+
object.device_class = result['name']
|
163
|
+
object.device_class_code = result['name_code']
|
164
|
+
object.device_class_icon = result['icon']
|
165
|
+
object.device_class_icon_big = result['icon_big']
|
166
|
+
object.device_class_info_url = "https://udger.com/resources/ua-list/device-detail?device=#{result['name']}"
|
167
|
+
end
|
168
|
+
|
169
|
+
# If there is no @client_class_id and @match_client is not enabled
|
170
|
+
if @client_class_id == -1 && !@match_client
|
171
|
+
parse_client
|
172
|
+
end
|
173
|
+
|
174
|
+
if @deviceclass_id.zero? && @client_class_id != -1
|
175
|
+
query = 'SELECT deviceclass_id,name,name_code,icon,icon_big
|
176
|
+
FROM udger_deviceclass_list
|
177
|
+
JOIN udger_client_class ON udger_client_class.deviceclass_id=udger_deviceclass_list.id
|
178
|
+
WHERE udger_client_class.id=?'
|
179
|
+
data = db.execute(query, @client_class_id)
|
180
|
+
unless data.empty?
|
181
|
+
result = data[0]
|
182
|
+
@deviceclass_id = result['deviceclass_id']
|
183
|
+
object.device_class = result['name']
|
184
|
+
object.device_class_code = result['name_code']
|
185
|
+
object.device_class_icon = result['icon']
|
186
|
+
object.device_class_icon_big = result['icon_big']
|
187
|
+
object.device_class_info_url = "https://udger.com/resources/ua-list/device-detail?device=#{result['name']}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def devise_market_name
|
193
|
+
return unless object.os_family_code
|
194
|
+
# TODO: santize code
|
195
|
+
query = "SELECT id,regstring FROM udger_devicename_regex WHERE
|
196
|
+
((os_family_code='" + object.os_family_code + "' AND os_code='-all-')
|
197
|
+
OR
|
198
|
+
(os_family_code='" + object.os_family_code + "' AND os_code='" + object.os_code + "'))
|
199
|
+
ORDER BY sequence"
|
200
|
+
regexp_parse(query, false) do |match, result|
|
201
|
+
sub_query = "SELECT marketname,brand_code,brand,brand_url,icon,icon_big
|
202
|
+
FROM udger_devicename_list
|
203
|
+
JOIN udger_devicename_brand ON udger_devicename_brand.id=udger_devicename_list.brand_id
|
204
|
+
WHERE regex_id=? and code = ? COLLATE NOCASE"
|
205
|
+
qc = db.execute(sub_query, result['id'], match[0])
|
206
|
+
unless qc.empty?
|
207
|
+
res = qc[0]
|
208
|
+
object.device_marketname = res['marketname']
|
209
|
+
object.device_brand = res['brand']
|
210
|
+
object.device_brand_code = res['brand_code']
|
211
|
+
object.device_brand_homepage = res['brand_url']
|
212
|
+
object.device_brand_icon = res['icon']
|
213
|
+
object.device_brand_icon_big = res['icon_big']
|
214
|
+
object.device_brand_info_url = 'https://udger.com/resources/ua-list/devices-brand-detail?brand=' + res['brand_code']
|
215
|
+
break
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
end
|
data/udger.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'udger/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'udger'
|
8
|
+
spec.version = Udger::VERSION
|
9
|
+
spec.authors = ['Bojan Milosavljevic']
|
10
|
+
spec.email = ['milboj@gmail.com']
|
11
|
+
|
12
|
+
spec.summary = %q{Udger user agent library}
|
13
|
+
spec.description = %q{Udger user agent library.}
|
14
|
+
spec.homepage = "https://github.com/TowerData/udger-ruby"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata['allowed_push_host'] = 'http://mygemserver.com'
|
21
|
+
# else
|
22
|
+
# raise 'RubyGems 2.0 or newer is required to protect against ' \
|
23
|
+
# 'public gem pushes.'
|
24
|
+
# end
|
25
|
+
|
26
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
28
|
+
end
|
29
|
+
spec.bindir = 'exe'
|
30
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ['lib']
|
32
|
+
|
33
|
+
spec.add_dependency 'sqlite3', '~> 1.3'
|
34
|
+
spec.add_dependency 'lru_redux', '~> 1.1'
|
35
|
+
|
36
|
+
spec.add_development_dependency 'bundler', '~> 1.14'
|
37
|
+
spec.add_development_dependency 'simplecov', '~> 0.14'
|
38
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
39
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: udger
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bojan Milosavljevic
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sqlite3
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: lru_redux
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.14'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.14'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: simplecov
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.14'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.14'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.0'
|
97
|
+
description: Udger user agent library.
|
98
|
+
email:
|
99
|
+
- milboj@gmail.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- ".gitignore"
|
105
|
+
- ".rspec"
|
106
|
+
- ".travis.yml"
|
107
|
+
- Gemfile
|
108
|
+
- MIT-LICENSE
|
109
|
+
- README.md
|
110
|
+
- Rakefile
|
111
|
+
- bin/console
|
112
|
+
- bin/setup
|
113
|
+
- lib/udger.rb
|
114
|
+
- lib/udger/base_parser.rb
|
115
|
+
- lib/udger/ip_parser.rb
|
116
|
+
- lib/udger/object.rb
|
117
|
+
- lib/udger/parser.rb
|
118
|
+
- lib/udger/ua_parser.rb
|
119
|
+
- lib/udger/version.rb
|
120
|
+
- udger.gemspec
|
121
|
+
homepage: https://github.com/TowerData/udger-ruby
|
122
|
+
licenses:
|
123
|
+
- MIT
|
124
|
+
metadata: {}
|
125
|
+
post_install_message:
|
126
|
+
rdoc_options: []
|
127
|
+
require_paths:
|
128
|
+
- lib
|
129
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
requirements: []
|
140
|
+
rubyforge_project:
|
141
|
+
rubygems_version: 2.5.2
|
142
|
+
signing_key:
|
143
|
+
specification_version: 4
|
144
|
+
summary: Udger user agent library
|
145
|
+
test_files: []
|