google_robotstxt_parser 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ #include <ruby.h>
2
+ #include "robots.h"
3
+ #include <string>
4
+
5
+ typedef VALUE(ruby_method)(...);
6
+
7
+ extern "C"
8
+ {
9
+ static VALUE m_Robotstxt;
10
+
11
+ static VALUE robotstxt_allowed_by_robots(VALUE self, VALUE robots_content, VALUE user_agent, VALUE url)
12
+ {
13
+ Check_Type(robots_content, T_STRING);
14
+ Check_Type(user_agent, T_STRING);
15
+ Check_Type(url, T_STRING);
16
+
17
+ std::string rc = RSTRING_PTR(robots_content);
18
+ std::string ua = RSTRING_PTR(user_agent);
19
+ std::string ur = RSTRING_PTR(url);
20
+
21
+ googlebot::RobotsMatcher matcher;
22
+ bool result = matcher.OneAgentAllowedByRobots(rc, ua, ur);
23
+ return result ? Qtrue : Qfalse;
24
+ }
25
+
26
+ void Init_robotstxt()
27
+ {
28
+ m_Robotstxt = rb_define_module("Robotstxt");
29
+
30
+ rb_define_method(m_Robotstxt, "allowed_by_robots", (ruby_method *)&robotstxt_allowed_by_robots, 3);
31
+ }
32
+ }
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'google_robotstxt_parser/version'
7
+
8
+ Gem::Specification.new do |s|
9
+ s.name = 'google_robotstxt_parser'
10
+ s.version = GoogleRobotstxtParser::VERSION
11
+ s.date = '2019-11-25'
12
+ s.summary = 'Ruby gem wrapper around Google Robotstxt Parser library'
13
+ s.description = 'This is a unofficial Ruby gem that provides a wrapper around [Google Robotstxt Parser C++ library]'
14
+ s.authors = ['Bastien Montois']
15
+ s.email = 'bastien.montois@la-revanche-des-sites.fr'
16
+ s.files = Dir['lib/**/*.rb']
17
+ s.homepage = 'https://github.com/larevanchedessites/google-robotstxt-ruby'
18
+ s.license = 'MIT'
19
+
20
+ # normal spec stuff above
21
+ s.files = `git ls-files`.split("\n")
22
+
23
+ # get an array of submodule dirs by executing 'pwd' inside each submodule
24
+ gem_dir = File.expand_path(File.dirname(__FILE__)) + "/"
25
+ `git submodule --quiet foreach pwd`.split($\).each do |submodule_path|
26
+ Dir.chdir(submodule_path) do
27
+ submodule_relative_path = submodule_path.sub gem_dir, ""
28
+ # issue git ls-files in submodule's directory and
29
+ # prepend the submodule path to create absolute file paths
30
+ `git ls-files`.split($\).each do |filename|
31
+ s.files << "#{submodule_relative_path}/#{filename}"
32
+ end
33
+ end
34
+ end
35
+
36
+ s.require_paths = %w[lib ext]
37
+ s.extensions = ['ext/robotstxt/extconf.rb']
38
+
39
+ s.add_development_dependency 'bundler', '~> 2.0'
40
+ s.add_development_dependency 'rake', '~> 10.0'
41
+ s.add_development_dependency 'rake-compiler', '~> 1.0'
42
+
43
+ s.add_development_dependency 'guard-rspec', '~> 4.7'
44
+ s.add_development_dependency 'rspec', '~> 3.0'
45
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GoogleRobotstxtParser
4
+ VERSION = '0.0.3'
5
+ GOOGLE_ROBOTSTXT_MAJOR_VERSION = 1
6
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GoogleRobotstxtParser
4
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'robotstxt'
4
+
5
+ RSpec.describe GoogleRobotstxtParser do
6
+ include Robotstxt
7
+
8
+ let(:context) { GoogleRobotstxtParser.create }
9
+
10
+ describe '.VERSION' do
11
+ it 'returns a string' do
12
+ expect(GoogleRobotstxtParser::VERSION).to be_instance_of String
13
+ end
14
+ end
15
+
16
+ describe 'allowed_by_robots' do
17
+ it 'should return the true' do
18
+ robotstxt_content = "# robotstxt.org/\n\nUser-agent: *\nDisallow: \n\nSitemap: https://www.bqst.fr/sitemap.xml"
19
+ user_agent = 'GoogleBot'
20
+ url = 'https://www.bqst.fr'
21
+
22
+ expect(Robotstxt.allowed_by_robots(robotstxt_content, user_agent, url)).to eq(true)
23
+ end
24
+
25
+ it 'should return the true' do
26
+ robotstxt_content = "# robotstxt.org/\n\nUser-agent: *\nDisallow: /\n\nSitemap: https://www.bqst.fr/sitemap.xml"
27
+ user_agent = 'GoogleBot'
28
+ url = 'https://www.bqst.fr'
29
+
30
+ expect(Robotstxt.allowed_by_robots(robotstxt_content, user_agent, url)).to eq(false)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'simplecov'
4
+ SimpleCov.start
5
+
6
+ require 'bundler/setup'
7
+ require 'sound_io'
8
+
9
+ RSpec.configure do |config|
10
+ # Enable flags like --only-failures and --next-failure
11
+ config.example_status_persistence_file_path = '.rspec_status'
12
+
13
+ # Disable RSpec exposing methods globally on `Module` and `main`
14
+ config.disable_monkey_patching!
15
+
16
+ config.expect_with :rspec do |c|
17
+ c.syntax = :expect
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_robotstxt_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Bastien Montois
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-11-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '4.7'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '4.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ description: This is a unofficial Ruby gem that provides a wrapper around [Google
84
+ Robotstxt Parser C++ library]
85
+ email: bastien.montois@la-revanche-des-sites.fr
86
+ executables: []
87
+ extensions:
88
+ - ext/robotstxt/extconf.rb
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".gitmodules"
93
+ - CHANGELOG.md
94
+ - CODE_OF_CONDUCT.md
95
+ - Gemfile
96
+ - Guardfile
97
+ - LICENSE
98
+ - README.md
99
+ - Rakefile
100
+ - ext/robotstxt/.DS_Store
101
+ - ext/robotstxt/extconf.rb
102
+ - ext/robotstxt/robotstxt.cc
103
+ - ext/robotstxt/robotstxt/.gitignore
104
+ - ext/robotstxt/robotstxt/BUILD
105
+ - ext/robotstxt/robotstxt/CMakeLists.txt
106
+ - ext/robotstxt/robotstxt/CMakeLists.txt.in
107
+ - ext/robotstxt/robotstxt/CONTRIBUTING.md
108
+ - ext/robotstxt/robotstxt/LICENSE
109
+ - ext/robotstxt/robotstxt/README.md
110
+ - ext/robotstxt/robotstxt/WORKSPACE
111
+ - ext/robotstxt/robotstxt/protocol-draft/README.md
112
+ - ext/robotstxt/robotstxt/protocol-draft/draft-koster-rep-00.txt
113
+ - ext/robotstxt/robotstxt/robots.cc
114
+ - ext/robotstxt/robotstxt/robots.h
115
+ - ext/robotstxt/robotstxt/robots_main.cc
116
+ - ext/robotstxt/robotstxt/robots_test.cc
117
+ - google_robotstxt_parser.gemspec
118
+ - lib/google_robotstxt_parser.rb
119
+ - lib/google_robotstxt_parser/version.rb
120
+ - spec/google_robotstxt_parser_spec.rb
121
+ - spec/spec_helper.rb
122
+ homepage: https://github.com/larevanchedessites/google-robotstxt-ruby
123
+ licenses:
124
+ - MIT
125
+ metadata: {}
126
+ post_install_message:
127
+ rdoc_options: []
128
+ require_paths:
129
+ - lib
130
+ - ext
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubygems_version: 3.0.4
143
+ signing_key:
144
+ specification_version: 4
145
+ summary: Ruby gem wrapper around Google Robotstxt Parser library
146
+ test_files: []