sitemaps_parser 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +75 -0
- data/CHANGELOG.md +19 -0
- data/LICENSE.md +31 -0
- data/README.md +3 -2
- data/lib/sitemaps.rb +15 -12
- data/lib/sitemaps/parser.rb +8 -4
- data/lib/sitemaps/version.rb +1 -1
- data/sitemaps.gemspec +18 -16
- metadata +36 -17
- data/.travis.yml +0 -4
- data/LICENSE.txt +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: dd9e770cf06954a14d869600cb2c6281c7ee993f2d29d628abb1cf5a5b4809f1
|
4
|
+
data.tar.gz: bea190bd461d7eec0e72f092ae56546dce1b4fc2568aa2dd4757abf9b4675067
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4ee0f7fcd11e03a9f1a031088a0171b6b13e6a2c6bdbba1b9aac1232fe526a1947f7bcabf402714b071efa6fee5561fb413e154b8558d87d77d39fea8932117
|
7
|
+
data.tar.gz: 6bfee33a31ecb2680eabe09a9bf39ca19fb8c72e2f45af78275217f9898793039090c7352c7fba0b041331a15ef74fe2540ce200980168b2dfaf346ee24dc1f1
|
@@ -0,0 +1,75 @@
|
|
1
|
+
version: 2.1
|
2
|
+
|
3
|
+
executors:
|
4
|
+
test_executor:
|
5
|
+
working_directory: ~/sitemaps
|
6
|
+
|
7
|
+
docker:
|
8
|
+
- image: circleci/ruby:${RUBY_VERSION}
|
9
|
+
|
10
|
+
jobs:
|
11
|
+
build:
|
12
|
+
environment:
|
13
|
+
RUBY_VERSION: << parameters.ruby_version >>
|
14
|
+
ACTIVESUPPORT_VERSION: << parameters.activesupport_version >>
|
15
|
+
executor: test_executor
|
16
|
+
parameters:
|
17
|
+
ruby_version:
|
18
|
+
type: string
|
19
|
+
activesupport_version:
|
20
|
+
type: string
|
21
|
+
default: '~> 5.0'
|
22
|
+
steps:
|
23
|
+
- checkout
|
24
|
+
|
25
|
+
- restore_cache:
|
26
|
+
keys:
|
27
|
+
- sitemaps-cache-v1-{{ arch }}-{{ .Branch }}-{{ .Revision }}
|
28
|
+
- sitemaps-cache-v1-{{ arch }}-{{ .Branch }}
|
29
|
+
- sitemaps-cache-v1
|
30
|
+
|
31
|
+
- run:
|
32
|
+
name: Bundle Install
|
33
|
+
command: |
|
34
|
+
bundle check --path=vendor/bundle || bundle install --clean --path vendor/bundle
|
35
|
+
|
36
|
+
- save_cache:
|
37
|
+
key: sitemaps-cache-v1-{{ arch }}-{{ .Branch }}-{{ .Revision }}
|
38
|
+
paths:
|
39
|
+
- vendor/bundle
|
40
|
+
|
41
|
+
- run:
|
42
|
+
name: RSpec
|
43
|
+
command: bundle exec rspec spec
|
44
|
+
|
45
|
+
# Once lower-level offenses are resolved, the fail-level flags should be removed
|
46
|
+
# so that any offense will cause the build to fail.
|
47
|
+
- run:
|
48
|
+
name: Rubocop
|
49
|
+
command: bundle exec rubocop --fail-level warning --display-only-fail-level-offenses
|
50
|
+
|
51
|
+
workflows:
|
52
|
+
build_and_test:
|
53
|
+
jobs:
|
54
|
+
- build:
|
55
|
+
name: 'ruby 2.3.8'
|
56
|
+
ruby_version: 2.3.8
|
57
|
+
- build:
|
58
|
+
name: 'ruby 2.4.6'
|
59
|
+
ruby_version: 2.4.6
|
60
|
+
- build:
|
61
|
+
name: 'ruby 2.5.5'
|
62
|
+
ruby_version: 2.5.5
|
63
|
+
- build:
|
64
|
+
name: 'ruby 2.6.3, activesupport 4'
|
65
|
+
ruby_version: 2.6.3
|
66
|
+
activesupport_version: '4.0.0'
|
67
|
+
- build:
|
68
|
+
name: 'ruby 2.6.3, activesupport 5'
|
69
|
+
ruby_version: 2.6.3
|
70
|
+
activesupport_version: '~> 5.0'
|
71
|
+
- build:
|
72
|
+
name: 'ruby 2.6.3, activesupport 6'
|
73
|
+
ruby_version: 2.6.3
|
74
|
+
activesupport_version: '6.0.0.rc1'
|
75
|
+
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# Changelog
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
|
4
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
5
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
|
+
|
7
|
+
## [Unreleased]
|
8
|
+
|
9
|
+
## [0.2.3] - 2019-06-19
|
10
|
+
|
11
|
+
### Changed
|
12
|
+
- [Run specs on CircleCi instead of Travis](https://github.com/GSA/sitemaps/commit/cd999ecbc6bcee36c20553c2d01b3bf1a24f4fad)
|
13
|
+
- [Update gem metadata](https://github.com/GSA/sitemaps/commit/67603d415af3deb989eb2d6f85be2f64461a9be5)
|
14
|
+
- [Require 100% code coverage](https://github.com/GSA/sitemaps/commit/032804a6476109150f10672e623b3412eb9dda11)
|
15
|
+
- [Depend on activesupport >4, < 7](https://github.com/GSA/sitemaps/commit/02ff67f8e4e5470942105b5dbe0b90bbdbaa5176)
|
16
|
+
|
17
|
+
### Fixed
|
18
|
+
- [Parse sitemaps including extra whitespace](https://github.com/GSA/sitemaps/commit/1ed7a427eb21a4a37d41b9cbfdfd81107e109c76)
|
19
|
+
- [Discover commented sitemaps in robots.txt](https://github.com/GSA/sitemaps/commit/2d3bd84140dc0df15112eb1e4f1f3b27d2ac6224)
|
data/LICENSE.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
As a work of the United States government, this project is in the
|
2
|
+
public domain within the United States.
|
3
|
+
|
4
|
+
Additionally, we waive copyright and related rights in the work
|
5
|
+
worldwide through the CC0 1.0 Universal public domain dedication.
|
6
|
+
|
7
|
+
## CC0 1.0 Universal summary
|
8
|
+
|
9
|
+
This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
|
10
|
+
|
11
|
+
### No copyright
|
12
|
+
|
13
|
+
The person who associated a work with this deed has dedicated the work to
|
14
|
+
the public domain by waiving all rights to the work worldwide
|
15
|
+
under copyright law, including all related and neighboring rights, to the
|
16
|
+
extent allowed by law.
|
17
|
+
|
18
|
+
You can copy, modify, distribute and perform the work, even for commercial
|
19
|
+
purposes, all without asking permission.
|
20
|
+
|
21
|
+
### Other information
|
22
|
+
|
23
|
+
In no way are the patent or trademark rights of any person affected by CC0,
|
24
|
+
nor are the rights that other persons may have in the work or in how the
|
25
|
+
work is used, such as publicity or privacy rights.
|
26
|
+
|
27
|
+
Unless expressly stated otherwise, the person who associated a work with
|
28
|
+
this deed makes no warranties about the work, and disclaims liability for
|
29
|
+
all uses of the work, to the fullest extent permitted by applicable law.
|
30
|
+
When using or citing the work, you should not imply endorsement by the
|
31
|
+
author or the affirmer.
|
data/README.md
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
# Sitemaps
|
2
2
|
|
3
3
|
[![Gem](https://img.shields.io/gem/v/sitemaps_parser.svg?style=flat-square)](https://rubygems.org/gems/sitemaps_parser)
|
4
|
+
[![CircleCI](https://circleci.com/gh/GSA/sitemaps.svg?style=svg)](https://circleci.com/gh/GSA/sitemaps)
|
4
5
|
|
5
6
|
Discover, retrieve and parse XML sitemaps, according to the spec at [sitemaps.org](http://sitemaps.org).
|
6
7
|
|
7
|
-
See [RDOC Documentation](
|
8
|
+
See [RDOC Documentation](https://gsa.github.io/sitemaps) for detailed documentation.
|
8
9
|
|
9
10
|
## Installation
|
10
11
|
|
@@ -56,7 +57,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
56
57
|
|
57
58
|
## Contributing
|
58
59
|
|
59
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
60
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/GSA/sitemaps. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
60
61
|
|
61
62
|
## License
|
62
63
|
|
data/lib/sitemaps.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/core_ext/object/try'
|
3
|
+
require 'active_support/core_ext/object/blank'
|
4
4
|
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
5
|
+
require 'set'
|
6
|
+
require 'time'
|
7
|
+
require 'rexml/document'
|
8
|
+
require 'net/http'
|
8
9
|
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
10
|
+
require 'sitemaps/version'
|
11
|
+
require 'sitemaps/parser'
|
12
|
+
require 'sitemaps/fetcher'
|
12
13
|
|
13
14
|
# Discover, fetch and parse XML sitemaps as defined by the `http://sitemaps.org` spec.
|
14
15
|
module Sitemaps
|
@@ -168,11 +169,13 @@ module Sitemaps
|
|
168
169
|
def discover_roots(url, fetcher)
|
169
170
|
robots = begin
|
170
171
|
robotsurl = url.clone
|
171
|
-
robotsurl.path =
|
172
|
+
robotsurl.path = '/robots.txt'
|
172
173
|
robotstxt = fetcher.call(robotsurl)
|
173
174
|
|
174
|
-
discovered = robotstxt.scan(/^Sitemap: (
|
175
|
-
|
175
|
+
discovered = robotstxt.scan(/^Sitemap: (\S+)/).flatten.map do |url|
|
176
|
+
URI.parse(url.strip)
|
177
|
+
end
|
178
|
+
discovered.presence
|
176
179
|
rescue
|
177
180
|
nil
|
178
181
|
end
|
data/lib/sitemaps/parser.rb
CHANGED
@@ -40,29 +40,33 @@ module Sitemaps
|
|
40
40
|
# @api private
|
41
41
|
# @private
|
42
42
|
def self.parse_loc(root)
|
43
|
-
loc =
|
43
|
+
loc = get_text(root, 'loc')
|
44
44
|
loc && URI.parse(loc) rescue nil
|
45
45
|
end
|
46
46
|
|
47
47
|
# @api private
|
48
48
|
# @private
|
49
49
|
def self.parse_lastmod(root)
|
50
|
-
mod =
|
50
|
+
mod = get_text(root, 'lastmod')
|
51
51
|
mod && Time.parse(mod) rescue nil
|
52
52
|
end
|
53
53
|
|
54
54
|
# @api private
|
55
55
|
# @private
|
56
56
|
def self.parse_changefreq(root)
|
57
|
-
freq =
|
57
|
+
freq = get_text(root, 'changefreq')
|
58
58
|
freq && VALID_CHANGEFREQ.include?(freq) ? freq.to_sym : nil
|
59
59
|
end
|
60
60
|
|
61
61
|
# @api private
|
62
62
|
# @private
|
63
63
|
def self.parse_priority(root)
|
64
|
-
priority =
|
64
|
+
priority = get_text(root, 'priority') || '0.5'
|
65
65
|
priority && Float(priority) rescue 0.5 # default priority according to spec
|
66
66
|
end
|
67
|
+
|
68
|
+
def self.get_text(root, key)
|
69
|
+
root.get_text(key)&.value&.strip
|
70
|
+
end
|
67
71
|
end
|
68
72
|
end
|
data/lib/sitemaps/version.rb
CHANGED
data/sitemaps.gemspec
CHANGED
@@ -4,29 +4,31 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'sitemaps/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
7
|
+
spec.name = 'sitemaps_parser'
|
8
8
|
spec.version = Sitemaps::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
9
|
+
spec.authors = ['Jonathan Raphaelson']
|
10
|
+
spec.email = ['jraphaelson@termscout.com']
|
11
11
|
|
12
|
-
spec.summary =
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
12
|
+
spec.summary = 'Retrieve and parse sitemaps, according to the sitemaps.org spec.'
|
13
|
+
spec.homepage = 'http://github.com/GSA/sitemaps'
|
14
|
+
spec.license = 'CC0 1.0 Universal'
|
15
15
|
|
16
16
|
files = `git ls-files -z`.split("\x0")
|
17
17
|
files.reject! { |f| f.match(%r{^(test|spec|features)/}) }
|
18
18
|
|
19
19
|
spec.files = files
|
20
|
-
spec.require_paths = [
|
20
|
+
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.add_development_dependency
|
23
|
-
spec.add_development_dependency
|
24
|
-
spec.add_development_dependency
|
25
|
-
spec.add_development_dependency
|
26
|
-
spec.add_development_dependency
|
27
|
-
spec.add_development_dependency
|
28
|
-
spec.add_development_dependency
|
29
|
-
spec.add_development_dependency
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1'
|
23
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
24
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
25
|
+
spec.add_development_dependency 'webmock', '~> 3.0'
|
26
|
+
spec.add_development_dependency 'vcr', '~> 3'
|
27
|
+
spec.add_development_dependency 'rubocop', '~> 0.71.0'
|
28
|
+
spec.add_development_dependency 'byebug', '~> 8.2'
|
29
|
+
spec.add_development_dependency 'yard', '~> 0.9.11'
|
30
|
+
spec.add_development_dependency 'simplecov', '~> 0.16'
|
30
31
|
|
31
|
-
spec.add_runtime_dependency
|
32
|
+
spec.add_runtime_dependency 'activesupport',
|
33
|
+
(ENV['ACTIVESUPPORT_VERSION'] || ['>= 4', '< 7'])
|
32
34
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemaps_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Raphaelson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '3.0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '3.0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: vcr
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
89
|
+
version: 0.71.0
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
96
|
+
version: 0.71.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: byebug
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,28 +114,48 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
117
|
+
version: 0.9.11
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
124
|
+
version: 0.9.11
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: simplecov
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0.16'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0.16'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: activesupport
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
130
144
|
- !ruby/object:Gem::Version
|
131
145
|
version: '4'
|
146
|
+
- - "<"
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '7'
|
132
149
|
type: :runtime
|
133
150
|
prerelease: false
|
134
151
|
version_requirements: !ruby/object:Gem::Requirement
|
135
152
|
requirements:
|
136
|
-
- - "
|
153
|
+
- - ">="
|
137
154
|
- !ruby/object:Gem::Version
|
138
155
|
version: '4'
|
156
|
+
- - "<"
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '7'
|
139
159
|
description:
|
140
160
|
email:
|
141
161
|
- jraphaelson@termscout.com
|
@@ -143,13 +163,14 @@ executables: []
|
|
143
163
|
extensions: []
|
144
164
|
extra_rdoc_files: []
|
145
165
|
files:
|
166
|
+
- ".circleci/config.yml"
|
146
167
|
- ".gitignore"
|
147
168
|
- ".rspec"
|
148
169
|
- ".rubocop.yml"
|
149
|
-
-
|
170
|
+
- CHANGELOG.md
|
150
171
|
- CODE_OF_CONDUCT.md
|
151
172
|
- Gemfile
|
152
|
-
- LICENSE.
|
173
|
+
- LICENSE.md
|
153
174
|
- README.md
|
154
175
|
- Rakefile
|
155
176
|
- bin/console
|
@@ -159,9 +180,9 @@ files:
|
|
159
180
|
- lib/sitemaps/parser.rb
|
160
181
|
- lib/sitemaps/version.rb
|
161
182
|
- sitemaps.gemspec
|
162
|
-
homepage: http://github.com/
|
183
|
+
homepage: http://github.com/GSA/sitemaps
|
163
184
|
licenses:
|
164
|
-
-
|
185
|
+
- CC0 1.0 Universal
|
165
186
|
metadata: {}
|
166
187
|
post_install_message:
|
167
188
|
rdoc_options: []
|
@@ -178,10 +199,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
199
|
- !ruby/object:Gem::Version
|
179
200
|
version: '0'
|
180
201
|
requirements: []
|
181
|
-
|
182
|
-
rubygems_version: 2.2.2
|
202
|
+
rubygems_version: 3.0.3
|
183
203
|
signing_key:
|
184
204
|
specification_version: 4
|
185
205
|
summary: Retrieve and parse sitemaps, according to the sitemaps.org spec.
|
186
206
|
test_files: []
|
187
|
-
has_rdoc:
|
data/.travis.yml
DELETED
data/LICENSE.txt
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
The MIT License (MIT)
|
2
|
-
|
3
|
-
Copyright (c) 2016 TermScout
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
7
|
-
in the Software without restriction, including without limitation the rights
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
10
|
-
furnished to do so, subject to the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be included in
|
13
|
-
all copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|