google_robotstxt_parser 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +28 -0
- data/.gitmodules +3 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +6 -0
- data/Guardfile +16 -0
- data/LICENSE +22 -0
- data/README.md +57 -0
- data/Rakefile +6 -0
- data/ext/robotstxt/.DS_Store +0 -0
- data/ext/robotstxt/extconf.rb +83 -0
- data/ext/robotstxt/robotstxt/.gitignore +1 -0
- data/ext/robotstxt/robotstxt/BUILD +40 -0
- data/ext/robotstxt/robotstxt/CMakeLists.txt +174 -0
- data/ext/robotstxt/robotstxt/CMakeLists.txt.in +30 -0
- data/ext/robotstxt/robotstxt/CONTRIBUTING.md +30 -0
- data/ext/robotstxt/robotstxt/LICENSE +203 -0
- data/ext/robotstxt/robotstxt/README.md +134 -0
- data/ext/robotstxt/robotstxt/WORKSPACE +28 -0
- data/ext/robotstxt/robotstxt/protocol-draft/README.md +9 -0
- data/ext/robotstxt/robotstxt/protocol-draft/draft-koster-rep-00.txt +529 -0
- data/ext/robotstxt/robotstxt/robots.cc +706 -0
- data/ext/robotstxt/robotstxt/robots.h +241 -0
- data/ext/robotstxt/robotstxt/robots_main.cc +101 -0
- data/ext/robotstxt/robotstxt/robots_test.cc +990 -0
- data/ext/robotstxt/robotstxt.cc +32 -0
- data/google_robotstxt_parser.gemspec +45 -0
- data/lib/google_robotstxt_parser/version.rb +6 -0
- data/lib/google_robotstxt_parser.rb +4 -0
- data/spec/google_robotstxt_parser_spec.rb +33 -0
- data/spec/spec_helper.rb +19 -0
- metadata +146 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include "robots.h"
|
3
|
+
#include <string>
|
4
|
+
|
5
|
+
typedef VALUE(ruby_method)(...);
|
6
|
+
|
7
|
+
extern "C"
|
8
|
+
{
|
9
|
+
static VALUE m_Robotstxt;
|
10
|
+
|
11
|
+
static VALUE robotstxt_allowed_by_robots(VALUE self, VALUE robots_content, VALUE user_agent, VALUE url)
|
12
|
+
{
|
13
|
+
Check_Type(robots_content, T_STRING);
|
14
|
+
Check_Type(user_agent, T_STRING);
|
15
|
+
Check_Type(url, T_STRING);
|
16
|
+
|
17
|
+
std::string rc = RSTRING_PTR(robots_content);
|
18
|
+
std::string ua = RSTRING_PTR(user_agent);
|
19
|
+
std::string ur = RSTRING_PTR(url);
|
20
|
+
|
21
|
+
googlebot::RobotsMatcher matcher;
|
22
|
+
bool result = matcher.OneAgentAllowedByRobots(rc, ua, ur);
|
23
|
+
return result ? Qtrue : Qfalse;
|
24
|
+
}
|
25
|
+
|
26
|
+
void Init_robotstxt()
|
27
|
+
{
|
28
|
+
m_Robotstxt = rb_define_module("Robotstxt");
|
29
|
+
|
30
|
+
rb_define_method(m_Robotstxt, "allowed_by_robots", (ruby_method *)&robotstxt_allowed_by_robots, 3);
|
31
|
+
}
|
32
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'google_robotstxt_parser/version'
|
7
|
+
|
8
|
+
Gem::Specification.new do |s|
|
9
|
+
s.name = 'google_robotstxt_parser'
|
10
|
+
s.version = GoogleRobotstxtParser::VERSION
|
11
|
+
s.date = '2019-11-25'
|
12
|
+
s.summary = 'Ruby gem wrapper around Google Robotstxt Parser library'
|
13
|
+
s.description = 'This is a unofficial Ruby gem that provides a wrapper around [Google Robotstxt Parser C++ library]'
|
14
|
+
s.authors = ['Bastien Montois']
|
15
|
+
s.email = 'bastien.montois@la-revanche-des-sites.fr'
|
16
|
+
s.files = Dir['lib/**/*.rb']
|
17
|
+
s.homepage = 'https://github.com/larevanchedessites/google-robotstxt-ruby'
|
18
|
+
s.license = 'MIT'
|
19
|
+
|
20
|
+
# normal spec stuff above
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
|
23
|
+
# get an array of submodule dirs by executing 'pwd' inside each submodule
|
24
|
+
gem_dir = File.expand_path(File.dirname(__FILE__)) + "/"
|
25
|
+
`git submodule --quiet foreach pwd`.split($\).each do |submodule_path|
|
26
|
+
Dir.chdir(submodule_path) do
|
27
|
+
submodule_relative_path = submodule_path.sub gem_dir, ""
|
28
|
+
# issue git ls-files in submodule's directory and
|
29
|
+
# prepend the submodule path to create absolute file paths
|
30
|
+
`git ls-files`.split($\).each do |filename|
|
31
|
+
s.files << "#{submodule_relative_path}/#{filename}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
s.require_paths = %w[lib ext]
|
37
|
+
s.extensions = ['ext/robotstxt/extconf.rb']
|
38
|
+
|
39
|
+
s.add_development_dependency 'bundler', '~> 2.0'
|
40
|
+
s.add_development_dependency 'rake', '~> 10.0'
|
41
|
+
s.add_development_dependency 'rake-compiler', '~> 1.0'
|
42
|
+
|
43
|
+
s.add_development_dependency 'guard-rspec', '~> 4.7'
|
44
|
+
s.add_development_dependency 'rspec', '~> 3.0'
|
45
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'robotstxt'
|
4
|
+
|
5
|
+
RSpec.describe GoogleRobotstxtParser do
|
6
|
+
include Robotstxt
|
7
|
+
|
8
|
+
let(:context) { GoogleRobotstxtParser.create }
|
9
|
+
|
10
|
+
describe '.VERSION' do
|
11
|
+
it 'returns a string' do
|
12
|
+
expect(GoogleRobotstxtParser::VERSION).to be_instance_of String
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe 'allowed_by_robots' do
|
17
|
+
it 'should return the true' do
|
18
|
+
robotstxt_content = "# robotstxt.org/\n\nUser-agent: *\nDisallow: \n\nSitemap: https://www.bqst.fr/sitemap.xml"
|
19
|
+
user_agent = 'GoogleBot'
|
20
|
+
url = 'https://www.bqst.fr'
|
21
|
+
|
22
|
+
expect(Robotstxt.allowed_by_robots(robotstxt_content, user_agent, url)).to eq(true)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should return the true' do
|
26
|
+
robotstxt_content = "# robotstxt.org/\n\nUser-agent: *\nDisallow: /\n\nSitemap: https://www.bqst.fr/sitemap.xml"
|
27
|
+
user_agent = 'GoogleBot'
|
28
|
+
url = 'https://www.bqst.fr'
|
29
|
+
|
30
|
+
expect(Robotstxt.allowed_by_robots(robotstxt_content, user_agent, url)).to eq(false)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'simplecov'
|
4
|
+
SimpleCov.start
|
5
|
+
|
6
|
+
require 'bundler/setup'
|
7
|
+
require 'sound_io'
|
8
|
+
|
9
|
+
RSpec.configure do |config|
|
10
|
+
# Enable flags like --only-failures and --next-failure
|
11
|
+
config.example_status_persistence_file_path = '.rspec_status'
|
12
|
+
|
13
|
+
# Disable RSpec exposing methods globally on `Module` and `main`
|
14
|
+
config.disable_monkey_patching!
|
15
|
+
|
16
|
+
config.expect_with :rspec do |c|
|
17
|
+
c.syntax = :expect
|
18
|
+
end
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: google_robotstxt_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bastien Montois
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-11-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: guard-rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '4.7'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '4.7'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.0'
|
83
|
+
description: This is a unofficial Ruby gem that provides a wrapper around [Google
|
84
|
+
Robotstxt Parser C++ library]
|
85
|
+
email: bastien.montois@la-revanche-des-sites.fr
|
86
|
+
executables: []
|
87
|
+
extensions:
|
88
|
+
- ext/robotstxt/extconf.rb
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- ".gitignore"
|
92
|
+
- ".gitmodules"
|
93
|
+
- CHANGELOG.md
|
94
|
+
- CODE_OF_CONDUCT.md
|
95
|
+
- Gemfile
|
96
|
+
- Guardfile
|
97
|
+
- LICENSE
|
98
|
+
- README.md
|
99
|
+
- Rakefile
|
100
|
+
- ext/robotstxt/.DS_Store
|
101
|
+
- ext/robotstxt/extconf.rb
|
102
|
+
- ext/robotstxt/robotstxt.cc
|
103
|
+
- ext/robotstxt/robotstxt/.gitignore
|
104
|
+
- ext/robotstxt/robotstxt/BUILD
|
105
|
+
- ext/robotstxt/robotstxt/CMakeLists.txt
|
106
|
+
- ext/robotstxt/robotstxt/CMakeLists.txt.in
|
107
|
+
- ext/robotstxt/robotstxt/CONTRIBUTING.md
|
108
|
+
- ext/robotstxt/robotstxt/LICENSE
|
109
|
+
- ext/robotstxt/robotstxt/README.md
|
110
|
+
- ext/robotstxt/robotstxt/WORKSPACE
|
111
|
+
- ext/robotstxt/robotstxt/protocol-draft/README.md
|
112
|
+
- ext/robotstxt/robotstxt/protocol-draft/draft-koster-rep-00.txt
|
113
|
+
- ext/robotstxt/robotstxt/robots.cc
|
114
|
+
- ext/robotstxt/robotstxt/robots.h
|
115
|
+
- ext/robotstxt/robotstxt/robots_main.cc
|
116
|
+
- ext/robotstxt/robotstxt/robots_test.cc
|
117
|
+
- google_robotstxt_parser.gemspec
|
118
|
+
- lib/google_robotstxt_parser.rb
|
119
|
+
- lib/google_robotstxt_parser/version.rb
|
120
|
+
- spec/google_robotstxt_parser_spec.rb
|
121
|
+
- spec/spec_helper.rb
|
122
|
+
homepage: https://github.com/larevanchedessites/google-robotstxt-ruby
|
123
|
+
licenses:
|
124
|
+
- MIT
|
125
|
+
metadata: {}
|
126
|
+
post_install_message:
|
127
|
+
rdoc_options: []
|
128
|
+
require_paths:
|
129
|
+
- lib
|
130
|
+
- ext
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubygems_version: 3.0.4
|
143
|
+
signing_key:
|
144
|
+
specification_version: 4
|
145
|
+
summary: Ruby gem wrapper around Google Robotstxt Parser library
|
146
|
+
test_files: []
|