udger 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/MIT-LICENSE +20 -0
- data/README.md +130 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/udger.rb +6 -0
- data/lib/udger/base_parser.rb +27 -0
- data/lib/udger/ip_parser.rb +107 -0
- data/lib/udger/object.rb +74 -0
- data/lib/udger/parser.rb +56 -0
- data/lib/udger/ua_parser.rb +221 -0
- data/lib/udger/version.rb +3 -0
- data/udger.gemspec +40 -0
- metadata +145 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 28464c9d0822abeeb3455da475fe10ea832692fc
|
4
|
+
data.tar.gz: aee465243f6064ada0d14e6616afc88133f0f19f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 653c0d6ae1fc9ed506c43567a286189e2dbfa837e05961cda06b65ac318a1a943d577c8521ba28c8ee4b850f3328d9170484c2162053d847a8ee556319875775
|
7
|
+
data.tar.gz: de56778029ecb0e13899d88e30893190e377b8f6c741afe919b0e836a5f36b2f409f70499148b576c609300602a7b5a2bb0fb7a2f350dc547772cb238536a723
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2017 TowerData
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
# Udger
|
2
|
+
|
3
|
+
## Installation
|
4
|
+
|
5
|
+
Add this line to your application's Gemfile:
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
gem 'udger'
|
9
|
+
```
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install udger
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
User agent and ip parser for Udger db.
|
22
|
+
|
23
|
+
|
24
|
+
require 'udger'
|
25
|
+
parser = Udger::Parser.new('path_to_udger_db', options) # File must be named 'udgerdb_v3.dat'
|
26
|
+
|
27
|
+
|
28
|
+
### Options
|
29
|
+
|
30
|
+
- cache - default is true, enable caching results. Only available for user agent parsing.
|
31
|
+
- lru_cache_size - default is 10,000. How many objects to be cached.
|
32
|
+
- ua_services - if you do not need all the information for a user agent, you can specify which services to receive. Available services are: [:crawler, :client, :os, :device, :device_market]. Reducing the number of services will improve performances.
|
33
|
+
|
34
|
+
|
35
|
+
### Parsing user agent
|
36
|
+
|
37
|
+
parser.parse_ua('Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0')
|
38
|
+
|
39
|
+
This returns a struct with the following data. If data are not present, it will return nil value.
|
40
|
+
|
41
|
+
- ua_class
|
42
|
+
- ua_class_code
|
43
|
+
- ua
|
44
|
+
- ua_version
|
45
|
+
- ua_version_major
|
46
|
+
- ua_uptodate_current_version
|
47
|
+
- ua_family
|
48
|
+
- ua_family_code
|
49
|
+
- ua_family_homepage
|
50
|
+
- ua_family_vendor
|
51
|
+
- ua_family_vendor_code
|
52
|
+
- ua_family_vendor_homepage
|
53
|
+
- ua_family_icon
|
54
|
+
- ua_family_icon_big
|
55
|
+
- ua_family_info_url
|
56
|
+
- ua_engine
|
57
|
+
- os
|
58
|
+
- os_code
|
59
|
+
- os_homepage
|
60
|
+
- os_icon
|
61
|
+
- os_icon_big
|
62
|
+
- os_info_url
|
63
|
+
- os_family
|
64
|
+
- os_family_code
|
65
|
+
- os_family_vendor
|
66
|
+
- os_family_vendor_code
|
67
|
+
- os_family_vendor_homepage
|
68
|
+
- device_class
|
69
|
+
- device_class_code
|
70
|
+
- device_class_icon
|
71
|
+
- device_class_icon_big
|
72
|
+
- device_class_info_url
|
73
|
+
- device_marketname
|
74
|
+
- device_brand
|
75
|
+
- device_brand_code
|
76
|
+
- device_brand_homepage
|
77
|
+
- device_brand_icon
|
78
|
+
- device_brand_icon_big
|
79
|
+
- device_brand_info_url
|
80
|
+
- crawler_last_seen
|
81
|
+
- crawler_category
|
82
|
+
- crawler_category_code
|
83
|
+
- crawler_respect_robotstxt
|
84
|
+
|
85
|
+
### Parsing ip
|
86
|
+
|
87
|
+
parser.parse_ip('108.61.199.93')
|
88
|
+
parser.parse_ip('2a02:598:111::9')
|
89
|
+
|
90
|
+
|
91
|
+
This returns a struct with the following data. If data are not present, it will return nil value.
|
92
|
+
|
93
|
+
- ip_ver
|
94
|
+
- ip_classification
|
95
|
+
- ip_classification_code
|
96
|
+
- ip_hostname
|
97
|
+
- ip_last_seen
|
98
|
+
- ip_country
|
99
|
+
- ip_country_code
|
100
|
+
- ip_city
|
101
|
+
- crawler_name
|
102
|
+
- crawler_ver
|
103
|
+
- crawler_ver_major
|
104
|
+
- crawler_family
|
105
|
+
- crawler_family_code
|
106
|
+
- crawler_family_homepage
|
107
|
+
- crawler_family_vendor
|
108
|
+
- crawler_family_vendor_code
|
109
|
+
- crawler_family_vendor_homepage
|
110
|
+
- crawler_family_icon
|
111
|
+
- crawler_family_info_url
|
112
|
+
- crawler_last_seen
|
113
|
+
- crawler_category
|
114
|
+
- crawler_category_code
|
115
|
+
- crawler_respect_robotstxt
|
116
|
+
- datacenter_name
|
117
|
+
- datacenter_name_code
|
118
|
+
- datacenter_homepage
|
119
|
+
|
120
|
+
|
121
|
+
## Development
|
122
|
+
|
123
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
124
|
+
|
125
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
126
|
+
|
127
|
+
## Contributing
|
128
|
+
|
129
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/TowerData/udger.
|
130
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "udger"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/udger.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Udger
|
2
|
+
class BaseParser
|
3
|
+
attr_accessor :db
|
4
|
+
|
5
|
+
def initialize(db)
|
6
|
+
@db = db
|
7
|
+
end
|
8
|
+
|
9
|
+
protected
|
10
|
+
|
11
|
+
def regexp(string)
|
12
|
+
r1 = string.index('/')
|
13
|
+
r2 = string.length - string.reverse.index('/') - 1
|
14
|
+
Regexp.new string[r1 + 1..r2 - 1], true
|
15
|
+
end
|
16
|
+
|
17
|
+
def regexp_parse(query, _cache = true)
|
18
|
+
db.execute(query) do |row|
|
19
|
+
match = ua_string.scan(regexp(row['regstring']))
|
20
|
+
unless match.empty?
|
21
|
+
yield match, row
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'ipaddr'
|
2
|
+
module Udger
|
3
|
+
class IpParser < BaseParser
|
4
|
+
attr_accessor :db, :object, :ip
|
5
|
+
|
6
|
+
def initialize(db, ip)
|
7
|
+
super(db)
|
8
|
+
@ip = ip
|
9
|
+
@object = IpAddress.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse
|
13
|
+
return unless @ip
|
14
|
+
object.ip = @ip
|
15
|
+
parse_ip_object
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_ip_object
|
19
|
+
ip_object = fetch_ip()
|
20
|
+
return if ip_object.nil?
|
21
|
+
object.ip_ver = ip_object.ipv4? ? 4 : 6
|
22
|
+
query = "SELECT udger_crawler_list.id as botid,ip_last_seen,ip_hostname,ip_country,ip_city,ip_country_code,ip_classification,ip_classification_code,
|
23
|
+
name,ver,ver_major,last_seen,respect_robotstxt,family,family_code,family_homepage,family_icon,vendor,vendor_code,vendor_homepage,crawler_classification,crawler_classification_code
|
24
|
+
FROM udger_ip_list
|
25
|
+
JOIN udger_ip_class ON udger_ip_class.id=udger_ip_list.class_id
|
26
|
+
LEFT JOIN udger_crawler_list ON udger_crawler_list.id=udger_ip_list.crawler_id
|
27
|
+
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
|
28
|
+
WHERE ip=? ORDER BY sequence"
|
29
|
+
data = db.execute(query, ip_object.to_s)
|
30
|
+
unless data.empty?
|
31
|
+
result = data[0]
|
32
|
+
object.ip_classification = result['ip_classification']
|
33
|
+
object.ip_classification_code = result['ip_classification_code']
|
34
|
+
object.ip_last_seen = result['ip_last_seen']
|
35
|
+
object.ip_hostname = result['ip_hostname']
|
36
|
+
object.ip_country = result['ip_country']
|
37
|
+
object.ip_country_code = result['ip_country_code']
|
38
|
+
object.ip_city = result['ip_city']
|
39
|
+
object.crawler_name = result['name']
|
40
|
+
object.crawler_ver = result['ver']
|
41
|
+
object.crawler_ver_major = result['ver_major']
|
42
|
+
object.crawler_family = result['family']
|
43
|
+
object.crawler_family_code = result['family_code']
|
44
|
+
object.crawler_family_homepage = result['family_homepage']
|
45
|
+
object.crawler_family_vendor = result['vendor']
|
46
|
+
object.crawler_family_vendor_code = result['vendor_code']
|
47
|
+
object.crawler_family_vendor_homepage = result['vendor_homepage']
|
48
|
+
object.crawler_family_icon = result['family_icon']
|
49
|
+
if result['ip_classification_code'] == 'crawler'
|
50
|
+
object.crawler_family_info_url = "https://udger.com/resources/ua-list/bot-detail?bot=#{result['family']}#id#{result['botid']}"
|
51
|
+
end
|
52
|
+
object.crawler_last_seen = result['last_seen']
|
53
|
+
object.crawler_category = result['crawler_classification']
|
54
|
+
object.crawler_category_code = result['crawler_classification_code']
|
55
|
+
object.crawler_respect_robotstxt = result['respect_robotstxt']
|
56
|
+
else
|
57
|
+
object.ip_classification = 'Unrecognized'
|
58
|
+
object.ip_classification_code = 'Unrecognized'
|
59
|
+
end
|
60
|
+
|
61
|
+
if object.ip_ver == 4
|
62
|
+
ip4_format(ip_object)
|
63
|
+
else
|
64
|
+
ip6_format(ip_object)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def ip4_format(ip_object)
|
69
|
+
query = 'SELECT name,name_code,homepage
|
70
|
+
FROM udger_datacenter_range
|
71
|
+
JOIN udger_datacenter_list ON udger_datacenter_range.datacenter_id=udger_datacenter_list.id
|
72
|
+
WHERE iplong_from <= ? AND iplong_to >= ?'
|
73
|
+
ip_int = ip_object.to_i
|
74
|
+
data = db.execute(query, ip_int, ip_int)
|
75
|
+
return if data.empty?
|
76
|
+
result = data[0]
|
77
|
+
object.datacenter_name = result['name']
|
78
|
+
object.datacenter_name_code = result['name_code']
|
79
|
+
object.datacenter_homepage = result['homepage']
|
80
|
+
end
|
81
|
+
|
82
|
+
def ip6_format(ip_object)
|
83
|
+
ip_range = ip_object.to_string.split(':').map { |x| x.to_i(16) }
|
84
|
+
query = 'SELECT name,name_code,homepage
|
85
|
+
FROM udger_datacenter_range6
|
86
|
+
JOIN udger_datacenter_list ON udger_datacenter_range6.datacenter_id=udger_datacenter_list.id
|
87
|
+
WHERE '
|
88
|
+
ip_range.each_with_index do |value, index|
|
89
|
+
query += " iplong_from#{index} <= #{value} AND iplong_to#{index} >= #{value}"
|
90
|
+
query += ' AND ' if index < 7
|
91
|
+
end
|
92
|
+
|
93
|
+
data = db.execute(query)
|
94
|
+
return if data.empty?
|
95
|
+
result = data[0]
|
96
|
+
object.datacenter_name = result['name']
|
97
|
+
object.datacenter_name_code = result['name_code']
|
98
|
+
object.datacenter_homepage = result['homepage']
|
99
|
+
end
|
100
|
+
|
101
|
+
def fetch_ip
|
102
|
+
IPAddr.new @ip
|
103
|
+
rescue
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/udger/object.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module Udger
|
2
|
+
UserAgent = Struct.new :ua_string,
|
3
|
+
:ua_class,
|
4
|
+
:ua_class_code,
|
5
|
+
:ua,
|
6
|
+
:ua_version,
|
7
|
+
:ua_version_major,
|
8
|
+
:ua_uptodate_current_version,
|
9
|
+
:ua_family,
|
10
|
+
:ua_family_code,
|
11
|
+
:ua_family_homepage,
|
12
|
+
:ua_family_vendor,
|
13
|
+
:ua_family_vendor_code,
|
14
|
+
:ua_family_vendor_homepage,
|
15
|
+
:ua_family_icon,
|
16
|
+
:ua_family_icon_big,
|
17
|
+
:ua_family_info_url,
|
18
|
+
:ua_engine,
|
19
|
+
:os,
|
20
|
+
:os_code,
|
21
|
+
:os_homepage,
|
22
|
+
:os_icon,
|
23
|
+
:os_icon_big,
|
24
|
+
:os_info_url,
|
25
|
+
:os_family,
|
26
|
+
:os_family_code,
|
27
|
+
:os_family_vendor,
|
28
|
+
:os_family_vendor_code,
|
29
|
+
:os_family_vendor_homepage,
|
30
|
+
:device_class,
|
31
|
+
:device_class_code,
|
32
|
+
:device_class_icon,
|
33
|
+
:device_class_icon_big,
|
34
|
+
:device_class_info_url,
|
35
|
+
:device_marketname,
|
36
|
+
:device_brand,
|
37
|
+
:device_brand_code,
|
38
|
+
:device_brand_homepage,
|
39
|
+
:device_brand_icon,
|
40
|
+
:device_brand_icon_big,
|
41
|
+
:device_brand_info_url,
|
42
|
+
:crawler_last_seen,
|
43
|
+
:crawler_category,
|
44
|
+
:crawler_category_code,
|
45
|
+
:crawler_respect_robotstxt
|
46
|
+
|
47
|
+
IpAddress = Struct.new :ip,
|
48
|
+
:ip_ver,
|
49
|
+
:ip_classification,
|
50
|
+
:ip_classification_code,
|
51
|
+
:ip_hostname,
|
52
|
+
:ip_last_seen,
|
53
|
+
:ip_country,
|
54
|
+
:ip_country_code,
|
55
|
+
:ip_city,
|
56
|
+
:crawler_name,
|
57
|
+
:crawler_ver,
|
58
|
+
:crawler_ver_major,
|
59
|
+
:crawler_family,
|
60
|
+
:crawler_family_code,
|
61
|
+
:crawler_family_homepage,
|
62
|
+
:crawler_family_vendor,
|
63
|
+
:crawler_family_vendor_code,
|
64
|
+
:crawler_family_vendor_homepage,
|
65
|
+
:crawler_family_icon,
|
66
|
+
:crawler_family_info_url,
|
67
|
+
:crawler_last_seen,
|
68
|
+
:crawler_category,
|
69
|
+
:crawler_category_code,
|
70
|
+
:crawler_respect_robotstxt,
|
71
|
+
:datacenter_name,
|
72
|
+
:datacenter_name_code,
|
73
|
+
:datacenter_homepage
|
74
|
+
end
|
data/lib/udger/parser.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'sqlite3'
|
2
|
+
require 'lru_redux'
|
3
|
+
module Udger
|
4
|
+
class Parser
|
5
|
+
DB_FILENAME = 'udgerdb_v3.dat'.freeze
|
6
|
+
attr_reader :db
|
7
|
+
attr_reader :data_dir
|
8
|
+
attr_accessor :cache
|
9
|
+
attr_accessor :lru_cache_size
|
10
|
+
|
11
|
+
def initialize(data_dir = nil, lru_cache_size: 10_000, cache: true, ua_services: [])
|
12
|
+
@data_dir = data_dir || __dir__
|
13
|
+
@db = SQLite3::Database.new "#{@data_dir}/#{DB_FILENAME}"
|
14
|
+
@db.results_as_hash = true
|
15
|
+
@cache = cache
|
16
|
+
parse_ua_params(ua_services)
|
17
|
+
@lru_cache = lru_cache_size.zero? && !cache ? {} : LruRedux::Cache.new(lru_cache_size)
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def parse_ua(ua_string)
|
22
|
+
if @cache
|
23
|
+
cache_string = ua_string.hash
|
24
|
+
cache_value = @lru_cache[cache_string]
|
25
|
+
return cache_value if cache_value
|
26
|
+
result = parse_ua_no_cache(ua_string)
|
27
|
+
@lru_cache[cache_string] = result
|
28
|
+
result
|
29
|
+
else
|
30
|
+
parse_ua_no_cache(ua_string)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_ip(ip)
|
35
|
+
parser = IpParser.new @db, ip
|
36
|
+
parser.parse
|
37
|
+
parser.object
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def parse_ua_params(parse)
|
43
|
+
parsers = parse.empty?
|
44
|
+
@match_ua = {}
|
45
|
+
[:crawler, :client, :os, :device, :device_market].each do |p|
|
46
|
+
@match_ua[p] = parsers || parse.include?(p)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_ua_no_cache(ua_string)
|
51
|
+
parser = UaParser.new @db, ua_string, @match_ua
|
52
|
+
parser.parse
|
53
|
+
parser.object
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
module Udger
|
2
|
+
class UaParser < BaseParser
|
3
|
+
attr_accessor :db, :ua_string, :object
|
4
|
+
|
5
|
+
def initialize(db, ua_string, crawler: true, client: true, os: true, device: true, device_market: true)
|
6
|
+
super(db)
|
7
|
+
|
8
|
+
@match_crawler = crawler
|
9
|
+
@match_client = client
|
10
|
+
@match_os = os
|
11
|
+
@match_device = device
|
12
|
+
@match_device_market = device_market
|
13
|
+
|
14
|
+
@ua_string = ua_string
|
15
|
+
@object = UserAgent.new
|
16
|
+
@os_id = 0
|
17
|
+
@client_id = 0
|
18
|
+
@client_class_id = -1
|
19
|
+
@deviceclass_id = 0
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse
|
23
|
+
return unless ua_string
|
24
|
+
object.ua_string = ua_string
|
25
|
+
crawler_data = @match_crawler ? parse_crawler : []
|
26
|
+
if !crawler_data.empty?
|
27
|
+
format_crawler_data crawler_data[0]
|
28
|
+
else
|
29
|
+
parse_client if @match_client
|
30
|
+
if @match_os || @match_device_market
|
31
|
+
parse_os
|
32
|
+
parse_client_os
|
33
|
+
end
|
34
|
+
parse_device if @match_device
|
35
|
+
devise_market_name if @match_device_market
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def parse_crawler
|
43
|
+
query = "SELECT udger_crawler_list.id as botid,name,ver,ver_major,last_seen,respect_robotstxt,family,family_code,family_homepage,family_icon,vendor,vendor_code,vendor_homepage,crawler_classification,crawler_classification_code
|
44
|
+
FROM udger_crawler_list
|
45
|
+
LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
|
46
|
+
WHERE ua_string=?"
|
47
|
+
db.execute(query, ua_string)
|
48
|
+
end
|
49
|
+
|
50
|
+
def format_crawler_data(result)
|
51
|
+
@client_class_id = 99
|
52
|
+
@object.ua_class = 'Crawler'
|
53
|
+
@object.ua_class_code = 'crawler'
|
54
|
+
@object.ua = result['name']
|
55
|
+
@object.ua_version = result['ver']
|
56
|
+
@object.ua_version_major = result['ver_major']
|
57
|
+
@object.ua_family = result['family']
|
58
|
+
@object.ua_family_code = result['family_code']
|
59
|
+
@object.ua_family_homepage = result['family_homepage']
|
60
|
+
@object.ua_family_vendor = result['vendor']
|
61
|
+
@object.ua_family_vendor_code = result['vendor_code']
|
62
|
+
@object.ua_family_vendor_homepage = result['vendor_homepage']
|
63
|
+
@object.ua_family_icon = result['family_icon']
|
64
|
+
@object.ua_family_info_url = "https://udger.com/resources/ua-list/bot-detail?bot=#{result['family']}#id#{result['botid']}"
|
65
|
+
@object.crawler_last_seen = result['last_seen']
|
66
|
+
@object.crawler_category = result['crawler_classification']
|
67
|
+
@object.crawler_category_code = result['crawler_classification_code']
|
68
|
+
@object.crawler_respect_robotstxt = result['respect_robotstxt']
|
69
|
+
end
|
70
|
+
|
71
|
+
def parse_client
|
72
|
+
query = "SELECT class_id,client_id,regstring,name,name_code,homepage,icon,icon_big,engine,vendor,vendor_code,vendor_homepage,uptodate_current_version,client_classification,client_classification_code
|
73
|
+
FROM udger_client_regex
|
74
|
+
JOIN udger_client_list ON udger_client_list.id=udger_client_regex.client_id
|
75
|
+
JOIN udger_client_class ON udger_client_class.id=udger_client_list.class_id
|
76
|
+
ORDER BY sequence ASC"
|
77
|
+
|
78
|
+
regexp_parse(query) do |match, result|
|
79
|
+
@client_id = result['client_id']
|
80
|
+
@client_class_id = result['class_id']
|
81
|
+
|
82
|
+
object.ua_class = result['client_classification']
|
83
|
+
object.ua_class_code = result['client_classification_code']
|
84
|
+
if match[0].is_a? Array
|
85
|
+
string = match[0][0]
|
86
|
+
object.ua = "#{result['name']} #{string}"
|
87
|
+
object.ua_version = string
|
88
|
+
object.ua_version_major = string.split('.')[0]
|
89
|
+
else
|
90
|
+
object.ua = result['name']
|
91
|
+
end
|
92
|
+
object.ua_uptodate_current_version = result['uptodate_current_version']
|
93
|
+
object.ua_family = result['name']
|
94
|
+
object.ua_family_code = result['name_code']
|
95
|
+
object.ua_family_homepage = result['homepage']
|
96
|
+
object.ua_family_vendor = result['vendor']
|
97
|
+
object.ua_family_vendor_code = result['vendor_code']
|
98
|
+
object.ua_family_vendor_homepage = result['vendor_homepage']
|
99
|
+
object.ua_family_icon = result['icon']
|
100
|
+
object.ua_family_icon_big = result['icon_big']
|
101
|
+
object.ua_family_info_url = 'https://udger.com/resources/ua-list/browser-detail?browser=' + result['name']
|
102
|
+
object.ua_engine = result['engine']
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def parse_os
|
107
|
+
query = "SELECT os_id,regstring,family,family_code,name,name_code,homepage,icon,icon_big,vendor,vendor_code,vendor_homepage
|
108
|
+
FROM udger_os_regex
|
109
|
+
JOIN udger_os_list ON udger_os_list.id=udger_os_regex.os_id
|
110
|
+
ORDER BY sequence ASC"
|
111
|
+
|
112
|
+
regexp_parse(query) do |_match, result|
|
113
|
+
@os_id = result['os_id']
|
114
|
+
object.os = result['name']
|
115
|
+
object.os_code = result['name_code']
|
116
|
+
object.os_homepage = result['homepage']
|
117
|
+
object.os_icon = result['icon']
|
118
|
+
object.os_icon_big = result['icon_big']
|
119
|
+
object.os_info_url = 'https://udger.com/resources/ua-list/os-detail?os=' + result['name']
|
120
|
+
object.os_family = result['family']
|
121
|
+
object.os_family_code = result['family_code']
|
122
|
+
object.os_family_vendor = result['vendor']
|
123
|
+
object.os_family_vendor_code = result['vendor_code']
|
124
|
+
object.os_family_vendor_homepage = result['vendor_homepage']
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def parse_client_os
|
129
|
+
return if !@os_id.zero? || @client_id.zero?
|
130
|
+
|
131
|
+
query = 'SELECT os_id,family,family_code,name,name_code,homepage,icon,icon_big,vendor,vendor_code,vendor_homepage
|
132
|
+
FROM udger_client_os_relation
|
133
|
+
JOIN udger_os_list ON udger_os_list.id=udger_client_os_relation.os_id
|
134
|
+
WHERE client_id=?'
|
135
|
+
|
136
|
+
data = db.execute(query, @client_id)
|
137
|
+
return if data.empty?
|
138
|
+
result = data[0]
|
139
|
+
@os_id = result['os_id']
|
140
|
+
object.os = result['name']
|
141
|
+
object.os_code = result['name_code']
|
142
|
+
object.os_homepage = result['homepage']
|
143
|
+
object.os_icon = result['icon']
|
144
|
+
object.os_icon_big = result['icon_big']
|
145
|
+
object.os_info_url = "https://udger.com/resources/ua-list/os-detail?os=#{result['name']}"
|
146
|
+
object.os_family = result['family']
|
147
|
+
object.os_family_code = result['family_code']
|
148
|
+
object.os_family_vendor = result['vendor']
|
149
|
+
object.os_family_vendor_code = result['vendor_code']
|
150
|
+
object.os_family_vendor_homepage = result['vendor_homepage']
|
151
|
+
end
|
152
|
+
|
153
|
+
def parse_device
|
154
|
+
|
155
|
+
query = 'SELECT deviceclass_id,regstring,name,name_code,icon,icon_big
|
156
|
+
FROM udger_deviceclass_regex
|
157
|
+
JOIN udger_deviceclass_list ON udger_deviceclass_list.id=udger_deviceclass_regex.deviceclass_id
|
158
|
+
ORDER BY sequence ASC'
|
159
|
+
|
160
|
+
regexp_parse(query) do |_match, result|
|
161
|
+
@deviceclass_id = result['deviceclass_id']
|
162
|
+
object.device_class = result['name']
|
163
|
+
object.device_class_code = result['name_code']
|
164
|
+
object.device_class_icon = result['icon']
|
165
|
+
object.device_class_icon_big = result['icon_big']
|
166
|
+
object.device_class_info_url = "https://udger.com/resources/ua-list/device-detail?device=#{result['name']}"
|
167
|
+
end
|
168
|
+
|
169
|
+
# If there is no @client_class_id and @match_client is not enabled
|
170
|
+
if @client_class_id == -1 && !@match_client
|
171
|
+
parse_client
|
172
|
+
end
|
173
|
+
|
174
|
+
if @deviceclass_id.zero? && @client_class_id != -1
|
175
|
+
query = 'SELECT deviceclass_id,name,name_code,icon,icon_big
|
176
|
+
FROM udger_deviceclass_list
|
177
|
+
JOIN udger_client_class ON udger_client_class.deviceclass_id=udger_deviceclass_list.id
|
178
|
+
WHERE udger_client_class.id=?'
|
179
|
+
data = db.execute(query, @client_class_id)
|
180
|
+
unless data.empty?
|
181
|
+
result = data[0]
|
182
|
+
@deviceclass_id = result['deviceclass_id']
|
183
|
+
object.device_class = result['name']
|
184
|
+
object.device_class_code = result['name_code']
|
185
|
+
object.device_class_icon = result['icon']
|
186
|
+
object.device_class_icon_big = result['icon_big']
|
187
|
+
object.device_class_info_url = "https://udger.com/resources/ua-list/device-detail?device=#{result['name']}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def devise_market_name
|
193
|
+
return unless object.os_family_code
|
194
|
+
# TODO: santize code
|
195
|
+
query = "SELECT id,regstring FROM udger_devicename_regex WHERE
|
196
|
+
((os_family_code='" + object.os_family_code + "' AND os_code='-all-')
|
197
|
+
OR
|
198
|
+
(os_family_code='" + object.os_family_code + "' AND os_code='" + object.os_code + "'))
|
199
|
+
ORDER BY sequence"
|
200
|
+
regexp_parse(query, false) do |match, result|
|
201
|
+
sub_query = "SELECT marketname,brand_code,brand,brand_url,icon,icon_big
|
202
|
+
FROM udger_devicename_list
|
203
|
+
JOIN udger_devicename_brand ON udger_devicename_brand.id=udger_devicename_list.brand_id
|
204
|
+
WHERE regex_id=? and code = ? COLLATE NOCASE"
|
205
|
+
qc = db.execute(sub_query, result['id'], match[0])
|
206
|
+
unless qc.empty?
|
207
|
+
res = qc[0]
|
208
|
+
object.device_marketname = res['marketname']
|
209
|
+
object.device_brand = res['brand']
|
210
|
+
object.device_brand_code = res['brand_code']
|
211
|
+
object.device_brand_homepage = res['brand_url']
|
212
|
+
object.device_brand_icon = res['icon']
|
213
|
+
object.device_brand_icon_big = res['icon_big']
|
214
|
+
object.device_brand_info_url = 'https://udger.com/resources/ua-list/devices-brand-detail?brand=' + res['brand_code']
|
215
|
+
break
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
end
|
data/udger.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'udger/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'udger'
|
8
|
+
spec.version = Udger::VERSION
|
9
|
+
spec.authors = ['Bojan Milosavljevic']
|
10
|
+
spec.email = ['milboj@gmail.com']
|
11
|
+
|
12
|
+
spec.summary = %q{Udger user agent library}
|
13
|
+
spec.description = %q{Udger user agent library.}
|
14
|
+
spec.homepage = "https://github.com/TowerData/udger-ruby"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata['allowed_push_host'] = 'http://mygemserver.com'
|
21
|
+
# else
|
22
|
+
# raise 'RubyGems 2.0 or newer is required to protect against ' \
|
23
|
+
# 'public gem pushes.'
|
24
|
+
# end
|
25
|
+
|
26
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
28
|
+
end
|
29
|
+
spec.bindir = 'exe'
|
30
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ['lib']
|
32
|
+
|
33
|
+
spec.add_dependency 'sqlite3', '~> 1.3'
|
34
|
+
spec.add_dependency 'lru_redux', '~> 1.1'
|
35
|
+
|
36
|
+
spec.add_development_dependency 'bundler', '~> 1.14'
|
37
|
+
spec.add_development_dependency 'simplecov', '~> 0.14'
|
38
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
39
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: udger
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bojan Milosavljevic
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sqlite3
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: lru_redux
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.14'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.14'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: simplecov
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.14'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.14'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.0'
|
97
|
+
description: Udger user agent library.
|
98
|
+
email:
|
99
|
+
- milboj@gmail.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- ".gitignore"
|
105
|
+
- ".rspec"
|
106
|
+
- ".travis.yml"
|
107
|
+
- Gemfile
|
108
|
+
- MIT-LICENSE
|
109
|
+
- README.md
|
110
|
+
- Rakefile
|
111
|
+
- bin/console
|
112
|
+
- bin/setup
|
113
|
+
- lib/udger.rb
|
114
|
+
- lib/udger/base_parser.rb
|
115
|
+
- lib/udger/ip_parser.rb
|
116
|
+
- lib/udger/object.rb
|
117
|
+
- lib/udger/parser.rb
|
118
|
+
- lib/udger/ua_parser.rb
|
119
|
+
- lib/udger/version.rb
|
120
|
+
- udger.gemspec
|
121
|
+
homepage: https://github.com/TowerData/udger-ruby
|
122
|
+
licenses:
|
123
|
+
- MIT
|
124
|
+
metadata: {}
|
125
|
+
post_install_message:
|
126
|
+
rdoc_options: []
|
127
|
+
require_paths:
|
128
|
+
- lib
|
129
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
requirements: []
|
140
|
+
rubyforge_project:
|
141
|
+
rubygems_version: 2.5.2
|
142
|
+
signing_key:
|
143
|
+
specification_version: 4
|
144
|
+
summary: Udger user agent library
|
145
|
+
test_files: []
|