google_robotstxt_parser 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,32 @@
1
+ #include <ruby.h>
2
+ #include "robots.h"
3
+ #include <string>
4
+
5
+ typedef VALUE(ruby_method)(...);
6
+
7
+ extern "C"
8
+ {
9
+ static VALUE m_Robotstxt;
10
+
11
+ static VALUE robotstxt_allowed_by_robots(VALUE self, VALUE robots_content, VALUE user_agent, VALUE url)
12
+ {
13
+ Check_Type(robots_content, T_STRING);
14
+ Check_Type(user_agent, T_STRING);
15
+ Check_Type(url, T_STRING);
16
+
17
+ std::string rc = RSTRING_PTR(robots_content);
18
+ std::string ua = RSTRING_PTR(user_agent);
19
+ std::string ur = RSTRING_PTR(url);
20
+
21
+ googlebot::RobotsMatcher matcher;
22
+ bool result = matcher.OneAgentAllowedByRobots(rc, ua, ur);
23
+ return result ? Qtrue : Qfalse;
24
+ }
25
+
26
+ void Init_robotstxt()
27
+ {
28
+ m_Robotstxt = rb_define_module("Robotstxt");
29
+
30
+ rb_define_method(m_Robotstxt, "allowed_by_robots", (ruby_method *)&robotstxt_allowed_by_robots, 3);
31
+ }
32
+ }
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'google_robotstxt_parser/version'
7
+
8
+ Gem::Specification.new do |s|
9
+ s.name = 'google_robotstxt_parser'
10
+ s.version = GoogleRobotstxtParser::VERSION
11
+ s.date = '2019-11-25'
12
+ s.summary = 'Ruby gem wrapper around Google Robotstxt Parser library'
13
+ s.description = 'This is a unofficial Ruby gem that provides a wrapper around [Google Robotstxt Parser C++ library]'
14
+ s.authors = ['Bastien Montois']
15
+ s.email = 'bastien.montois@la-revanche-des-sites.fr'
16
+ s.files = Dir['lib/**/*.rb']
17
+ s.homepage = 'https://github.com/larevanchedessites/google-robotstxt-ruby'
18
+ s.license = 'MIT'
19
+
20
+ # normal spec stuff above
21
+ s.files = `git ls-files`.split("\n")
22
+
23
+ # get an array of submodule dirs by executing 'pwd' inside each submodule
24
+ gem_dir = File.expand_path(File.dirname(__FILE__)) + "/"
25
+ `git submodule --quiet foreach pwd`.split($\).each do |submodule_path|
26
+ Dir.chdir(submodule_path) do
27
+ submodule_relative_path = submodule_path.sub gem_dir, ""
28
+ # issue git ls-files in submodule's directory and
29
+ # prepend the submodule path to create absolute file paths
30
+ `git ls-files`.split($\).each do |filename|
31
+ s.files << "#{submodule_relative_path}/#{filename}"
32
+ end
33
+ end
34
+ end
35
+
36
+ s.require_paths = %w[lib ext]
37
+ s.extensions = ['ext/robotstxt/extconf.rb']
38
+
39
+ s.add_development_dependency 'bundler', '~> 2.0'
40
+ s.add_development_dependency 'rake', '~> 10.0'
41
+ s.add_development_dependency 'rake-compiler', '~> 1.0'
42
+
43
+ s.add_development_dependency 'guard-rspec', '~> 4.7'
44
+ s.add_development_dependency 'rspec', '~> 3.0'
45
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GoogleRobotstxtParser
4
+ VERSION = '0.0.3'
5
+ GOOGLE_ROBOTSTXT_MAJOR_VERSION = 1
6
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GoogleRobotstxtParser
4
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'robotstxt'
4
+
5
+ RSpec.describe GoogleRobotstxtParser do
6
+ include Robotstxt
7
+
8
+ let(:context) { GoogleRobotstxtParser.create }
9
+
10
+ describe '.VERSION' do
11
+ it 'returns a string' do
12
+ expect(GoogleRobotstxtParser::VERSION).to be_instance_of String
13
+ end
14
+ end
15
+
16
+ describe 'allowed_by_robots' do
17
+ it 'should return the true' do
18
+ robotstxt_content = "# robotstxt.org/\n\nUser-agent: *\nDisallow: \n\nSitemap: https://www.bqst.fr/sitemap.xml"
19
+ user_agent = 'GoogleBot'
20
+ url = 'https://www.bqst.fr'
21
+
22
+ expect(Robotstxt.allowed_by_robots(robotstxt_content, user_agent, url)).to eq(true)
23
+ end
24
+
25
+ it 'should return the true' do
26
+ robotstxt_content = "# robotstxt.org/\n\nUser-agent: *\nDisallow: /\n\nSitemap: https://www.bqst.fr/sitemap.xml"
27
+ user_agent = 'GoogleBot'
28
+ url = 'https://www.bqst.fr'
29
+
30
+ expect(Robotstxt.allowed_by_robots(robotstxt_content, user_agent, url)).to eq(false)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'simplecov'
4
+ SimpleCov.start
5
+
6
+ require 'bundler/setup'
7
+ require 'sound_io'
8
+
9
+ RSpec.configure do |config|
10
+ # Enable flags like --only-failures and --next-failure
11
+ config.example_status_persistence_file_path = '.rspec_status'
12
+
13
+ # Disable RSpec exposing methods globally on `Module` and `main`
14
+ config.disable_monkey_patching!
15
+
16
+ config.expect_with :rspec do |c|
17
+ c.syntax = :expect
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_robotstxt_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Bastien Montois
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-11-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '4.7'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '4.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ description: This is a unofficial Ruby gem that provides a wrapper around [Google
84
+ Robotstxt Parser C++ library]
85
+ email: bastien.montois@la-revanche-des-sites.fr
86
+ executables: []
87
+ extensions:
88
+ - ext/robotstxt/extconf.rb
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".gitmodules"
93
+ - CHANGELOG.md
94
+ - CODE_OF_CONDUCT.md
95
+ - Gemfile
96
+ - Guardfile
97
+ - LICENSE
98
+ - README.md
99
+ - Rakefile
100
+ - ext/robotstxt/.DS_Store
101
+ - ext/robotstxt/extconf.rb
102
+ - ext/robotstxt/robotstxt.cc
103
+ - ext/robotstxt/robotstxt/.gitignore
104
+ - ext/robotstxt/robotstxt/BUILD
105
+ - ext/robotstxt/robotstxt/CMakeLists.txt
106
+ - ext/robotstxt/robotstxt/CMakeLists.txt.in
107
+ - ext/robotstxt/robotstxt/CONTRIBUTING.md
108
+ - ext/robotstxt/robotstxt/LICENSE
109
+ - ext/robotstxt/robotstxt/README.md
110
+ - ext/robotstxt/robotstxt/WORKSPACE
111
+ - ext/robotstxt/robotstxt/protocol-draft/README.md
112
+ - ext/robotstxt/robotstxt/protocol-draft/draft-koster-rep-00.txt
113
+ - ext/robotstxt/robotstxt/robots.cc
114
+ - ext/robotstxt/robotstxt/robots.h
115
+ - ext/robotstxt/robotstxt/robots_main.cc
116
+ - ext/robotstxt/robotstxt/robots_test.cc
117
+ - google_robotstxt_parser.gemspec
118
+ - lib/google_robotstxt_parser.rb
119
+ - lib/google_robotstxt_parser/version.rb
120
+ - spec/google_robotstxt_parser_spec.rb
121
+ - spec/spec_helper.rb
122
+ homepage: https://github.com/larevanchedessites/google-robotstxt-ruby
123
+ licenses:
124
+ - MIT
125
+ metadata: {}
126
+ post_install_message:
127
+ rdoc_options: []
128
+ require_paths:
129
+ - lib
130
+ - ext
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubygems_version: 3.0.4
143
+ signing_key:
144
+ specification_version: 4
145
+ summary: Ruby gem wrapper around Google Robotstxt Parser library
146
+ test_files: []