gort 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e05f323b6f9699a6f39f7042d494e18856bd954b974cf6943d0a0fa6bdb1f263
4
+ data.tar.gz: 396813a5d7c484e5408603c17bb76d79ec4debce48edec8a6b8ecf8eeb0231da
5
+ SHA512:
6
+ metadata.gz: 9978aba28e9e77ea750cf61de9a22137706fc578fbe0bfbc348d9c97b4d516ea3b6a2e11250f115a2eeae8fea9752c48dc5803556fd3e9a4c0dcfc1e4e9a02e0
7
+ data.tar.gz: cae8fc644d21a8d31da01b2390f801ceb06b0fc857d1c3d900a62496ff111f41636e09316c86520ab829add7248dcfb1d2a818eea0dd04b030429f8734388be8
checksums.yaml.gz.sig ADDED
@@ -0,0 +1,2 @@
1
+ o�@(�{@�B�>I��(�S�S� AĤE*a�i��B�M��܄}C;�G�@�M�0��^�x!���r���;��7�܄�闶������_�)��w�Ⱦ�a�ŴR�:�{�Y@r���˾��m>�Ի{"~� �O��@I+�#S
2
+ �r��8p���B�5�h�%��-(�c�N��ϼDn3�} �\I�.~�}���2��I���<�6��ʻ�V�2K��&�[+���^|V6Ƹ���ͼ�
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Alexander Mankuta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "path_rule"
4
+
5
+ module Gort
6
+ # Allow rule.
7
+ class AllowRule < PathRule
8
+ # @param value [String] the path pattern to allow.
9
+ def initialize(value)
10
+ super(:allow, value)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "path_rule"
4
+
5
+ module Gort
6
+ # Disallow rule.
7
+ class DisallowRule < PathRule
8
+ # @param value [String] the path pattern to disallow.
9
+ def initialize(value)
10
+ super(:disallow, value)
11
+ end
12
+ end
13
+ end
data/lib/gort/group.rb ADDED
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "rule_set"
4
+
5
+ module Gort
6
+ # An access group
7
+ class Group < RuleSet
8
+ # Is this group valid?
9
+ #
10
+ # A valid group has at least one valid user-agent rule.
11
+ #
12
+ # @return [Boolean]
13
+ # @see UserAgentRule#valid?
14
+ def valid?
15
+ @valid ||=
16
+ rules.any? { |rule| rule.is_a?(UserAgentRule) && rule.valid? }
17
+ end
18
+
19
+ # Does this group apply to this specific user agent?
20
+ #
21
+ # This performa user agent matcchign acording to the RFC.
22
+ #
23
+ # @param user_agent [String]
24
+ # @return [Boolean]
25
+ def apply?(user_agent)
26
+ apply_to_all? || user_agent.match?(user_agent_regexp)
27
+ end
28
+
29
+ # @!group Formatting Methods
30
+
31
+ # A human readable representation of the group.
32
+ #
33
+ # @return [String]
34
+ # @tool
35
+ # :nocov:
36
+ def inspect
37
+ "#<#{self.class.name}:#{object_id} #{rules.inspect}>"
38
+ end
39
+ # :nocov:
40
+
41
+ # Produces a pretty human readable representation of the group.
42
+ #
43
+ # @param pp [PrettyPrint] pretty printer
44
+ # @return [void]
45
+ # @tool
46
+ # :nocov:
47
+ def pretty_print(pp)
48
+ pp.text("#{self.class.name}/#{object_id}")
49
+ pp.group(1, "[", "]") do
50
+ pp.breakable("")
51
+ pp.seplist(rules) do |rule|
52
+ pp.pp(rule)
53
+ end
54
+ pp.breakable("")
55
+ end
56
+ end
57
+ # :nocov:
58
+
59
+ # @!endgroup Formatting Methods
60
+
61
+ private
62
+
63
+ # Does this rule apply to all user agents?
64
+ #
65
+ # Effectively, is this rule is a `*` rule.
66
+ #
67
+ # @return [Boolean]
68
+ def apply_to_all?
69
+ @apply_to_all ||= rules.any? { |rule| rule.is_a?(UserAgentRule) && rule.valid? && rule.value == "*" }
70
+ end
71
+
72
+ # A compiled Regexp that mathes all user agents in this group.
73
+ #
74
+ # @return [Regexp]
75
+ def user_agent_regexp
76
+ @user_agent_regexp ||=
77
+ begin
78
+ specific_user_agent_rules = rules.select { |rule|
79
+ rule.is_a?(UserAgentRule) && rule.valid? && (rule.value != "*")
80
+ }
81
+ Regexp.new(specific_user_agent_rules.map { Regexp.escape(_1.value) }.join("|"), Regexp::IGNORECASE)
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gort
4
+ # Represents an invalid line in a robots.txt file.
5
+ #
6
+ # @note Technically, the RFC doesn't have invalid lines in its grammar
7
+ # but there are just too many broken robots.txt files on the internet.
8
+ #
9
+ # An invalid line is a line that can not be parsed as a rule and is not a comment.
10
+ class InvalidLine
11
+ # @param text [String] content of the line
12
+ def initialize(text)
13
+ @value = text
14
+ end
15
+
16
+ # Content of the line.
17
+ # @return [String]
18
+ attr_reader :value
19
+
20
+ # @!group Formatting Methods
21
+
22
+ # A human readable representation of the invalid line.
23
+ #
24
+ # @return [String]
25
+ # @tool
26
+ # :nocov:
27
+ def inspect
28
+ %(#<#{self.class.name}:#{object_id} "#{value}">)
29
+ end
30
+ # :nocov:
31
+
32
+ # Produces a pretty human readable representation of the invalid line.
33
+ #
34
+ # @param pp [PrettyPrint] pretty printer
35
+ # @return [void]
36
+ # @tool
37
+ # :nocov:
38
+ def pretty_print(pp)
39
+ pp.text("#{self.class.name}/#{object_id}< #{value} >")
40
+ end
41
+ # :nocov:
42
+
43
+ # @!endgroup Formatting Methods
44
+ end
45
+ end
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "invalid_line"
4
+ require_relative "rule"
5
+ require_relative "user_agent_rule"
6
+ require_relative "allow_rule"
7
+ require_relative "disallow_rule"
8
+ require_relative "rule_set"
9
+ require_relative "group"
10
+ require_relative "robots_txt"
11
+
12
+ module Gort
13
+ # robots.txt parser. It implements the parsing logic according to RFC 9309, including errata.
14
+ class Parser
15
+ # You may get this error if the input does not look like a text file.
16
+ class BinaryInputError < Error; end
17
+
18
+ # You may get this error if the input looks like a text file but its encoding is invalid.
19
+ class InvalidEncodingError < Error; end
20
+
21
+ UTF_8_BOM = "\ufeff"
22
+ private_constant :UTF_8_BOM
23
+
24
+ # @param input [String] The robots.txt content to parse. It must be encoded in UTF-8 or compatible encoding.
25
+ def initialize(input)
26
+ @input = detect_and_fix_encoding(input).then { |string| strip_bom(string) }
27
+ end
28
+
29
+ # RFC does not explicitly define the generic rule name syntax. It only defines that it has to be case-insensitive.
30
+ # It also provides a few pre-defined rule names such as User-Agent, Allow, and Disallow.
31
+ # Things that might be different from the RFC intention:
32
+ # - The rule name must start with a letter. RFC might allow other characters.
33
+ # - The rule name might contain underscores. RFC doesn't mention underscores.
34
+ # - The rule name might contain digits. RFC doesn't mention digits, either.
35
+ #
36
+ # This is only used for plausible rule detection.
37
+ RULE_KEY = /\A[a-z][a-z0-9_-]*\s*:/i
38
+ private_constant :RULE_KEY
39
+
40
+ # Actually parse the file.
41
+ #
42
+ # @return [Gort::RobotsTxt]
43
+ def parse
44
+ content_lines =
45
+ input.lines.map { |line|
46
+ line.split("#", 2).first.strip
47
+ }
48
+ .reject(&:empty?)
49
+
50
+ rules = content_lines.map { |line| parse_line(line) }
51
+ grouped_rules, standalone_rules = partition_rules(rules)
52
+ groups = group_rules(grouped_rules)
53
+
54
+ RobotsTxt.new(groups + standalone_rules)
55
+ end
56
+
57
+ private
58
+
59
+ # @return [String]
60
+ attr_reader :input
61
+
62
+ # @param string [String]
63
+ # @return [String]
64
+ def detect_and_fix_encoding(string)
65
+ string.encode(Encoding::UTF_8)
66
+ rescue EncodingError
67
+ require "rchardet"
68
+ result = CharDet.detect(string)
69
+ raise BinaryInputError, "Input does not look like text" if result["encoding"].nil? || result["confidence"] < 0.25
70
+
71
+ begin
72
+ string
73
+ .dup
74
+ .force_encoding(result["encoding"])
75
+ .encode(Encoding::UTF_8)
76
+ rescue EncodingError
77
+ raise InvalidEncodingError, "Input string looks like text but its encoding is invalid."
78
+ end
79
+ end
80
+
81
+ # @param string [String]
82
+ # @return [String]
83
+ def strip_bom(string)
84
+ if string[0] == UTF_8_BOM
85
+ string[1..] # Remove BOM
86
+ else
87
+ string
88
+ end
89
+ end
90
+
91
+ # @param line [String]
92
+ # @return [UserAgentRule, AllowRule, DisallowRule, Rule, InvalidLine]
93
+ def parse_line(line)
94
+ if line.match?(RULE_KEY)
95
+ # @type var key: String
96
+ # @type var value: String
97
+ key, value = line.split(":", 2).map(&:strip)
98
+ case key.downcase
99
+ when "user-agent"
100
+ UserAgentRule.new(value)
101
+ when "allow"
102
+ AllowRule.new(value)
103
+ when "disallow"
104
+ DisallowRule.new(value)
105
+ else
106
+ Rule.new(key, value)
107
+ end
108
+ else
109
+ InvalidLine.new(line)
110
+ end
111
+ end
112
+
113
+ # @param rules [Array<UserAgentRule, AllowRule, DisallowRule, Rule, InvalidLine>]
114
+ # @return [(Array<UserAgentRule, AllowRule, DisallowRule>, Array<AllowRule, DisallowRule, Rule, InvalidLine>)]
115
+ def partition_rules(rules)
116
+ standalone_rules = []
117
+ grouped_rules = []
118
+ rules.each do |rule|
119
+ case rule
120
+ when UserAgentRule
121
+ grouped_rules << rule
122
+ when AllowRule, DisallowRule
123
+ if grouped_rules.empty?
124
+ standalone_rules << rule
125
+ else
126
+ grouped_rules << rule
127
+ end
128
+ else
129
+ standalone_rules << rule
130
+ end
131
+ end
132
+
133
+ [grouped_rules, standalone_rules]
134
+ end
135
+
136
+ # @param rules [Array<UserAgentRule, AllowRule, DisallowRule>]
137
+ # @return [Array<Group>]
138
+ def group_rules(rules)
139
+ rules
140
+ .slice_when { |a, b| !a.is_a?(UserAgentRule) && b.is_a?(UserAgentRule) }
141
+ .map { |group| Group.new(group) }
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "rule"
4
+ require "addressable/uri"
5
+
6
+ module Gort
7
+ # A rule that matches a path and query string.
8
+ #
9
+ # @abstract
10
+ class PathRule < Rule
11
+ # Path patter has to start with a slash and not contain control characters or hash.
12
+ # It also has to be a valid UTF-8 string but this is checked diring parsing.
13
+ # It also can be empty.
14
+ PATH_PATTERN = %r{\A(?:[/*][^\u0000-\u0020\u0023$]*\$?)?\z}u
15
+ private_constant :PATH_PATTERN
16
+
17
+ def valid?
18
+ value.match?(PATH_PATTERN)
19
+ end
20
+
21
+ # Match the path and query string against the rule.
22
+ # Invalid rules never match.
23
+ # Empty rules never match, either. This is not explicitly stated in the RFC
24
+ # but it is explicitly described in previous robots.txt documents.
25
+ #
26
+ # @param path_and_query [String]
27
+ # @return [nil, (Integer, PathRule)]
28
+ # - +nil+ if the rule does not match the path and query string.
29
+ # - An array with the number of bytes matched and the rule itself if the rule matches.
30
+ def match(path_and_query)
31
+ return nil if !valid? || value.empty?
32
+
33
+ path_and_query = normalize_path_and_query(path_and_query)
34
+ match = path_and_query.match(regexp)
35
+ return nil unless match
36
+
37
+ [match.to_s.bytesize, self]
38
+ end
39
+
40
+ # @!group Formatting Methods
41
+
42
+ # A human readable representation of the rule.
43
+ #
44
+ # @return [String]
45
+ # @tool
46
+ # :nocov:
47
+ def inspect
48
+ %(#<#{self.class.name}:#{object_id} "#{value}">)
49
+ end
50
+ # :nocov:
51
+
52
+ # Produces a pretty human readable representation of the rule.
53
+ #
54
+ # @param pp [PrettyPrint] pretty printer
55
+ # @return [void]
56
+ # @tool
57
+ # :nocov:
58
+ def pretty_print(pp)
59
+ pp.text("#{self.class.name}/#{object_id}< #{value} >")
60
+ end
61
+ # :nocov:
62
+
63
+ # @!endgroup Formatting Methods
64
+
65
+ private
66
+
67
+ # @param path_and_query [String]
68
+ # @return [String]
69
+ def normalize_path_and_query(path_and_query)
70
+ pq = Addressable::URI.parse(path_and_query).normalize
71
+ pq.scheme = nil
72
+ pq.authority = nil
73
+ pq.fragment = nil
74
+ pq.to_s
75
+ end
76
+
77
+ # @return [Regexp]
78
+ def regexp
79
+ @regexp ||=
80
+ begin
81
+ parts = value.scan(/[^*$]+|[*$]/)
82
+ regexp_parts =
83
+ parts.map { |part|
84
+ case part
85
+ when "*"
86
+ ".*"
87
+ when "$"
88
+ "\\z"
89
+ else
90
+ Regexp.escape(Addressable::URI.normalized_encode(part))
91
+ end
92
+ }
93
+
94
+ Regexp.new("\\A#{regexp_parts.join}")
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gort
4
+ # Represents a robots.txt file.
5
+ class RobotsTxt
6
+ ROBOTS_TXT_PATH = "/robots.txt"
7
+ private_constant :ROBOTS_TXT_PATH
8
+
9
+ def initialize(rules)
10
+ @rules = rules
11
+ end
12
+
13
+ # @return [Array<Rule, Group, InvalidLine>]
14
+ attr_reader :rules
15
+
16
+ # Is this path allowed for the given user agent?
17
+ #
18
+ # @param user_agent [String]
19
+ # @param path_and_query [String]
20
+ # @return [Boolean]
21
+ # @see PathRule#match
22
+ # @see #disallow?
23
+ def allow?(user_agent, path_and_query)
24
+ return true if path_and_query == ROBOTS_TXT_PATH
25
+
26
+ top_match =
27
+ matches(user_agent, path_and_query)
28
+ .compact
29
+ # This is an arcane bit.
30
+ # The rules are reverse sorted by match length (i.e. longest first),
31
+ # and then by class name using the fact that allow goes before disallow.
32
+ # This is the rule precedence order defined in the RFC.
33
+ .min_by { |(match_length, rule)| [-match_length, rule.class.name] }
34
+
35
+ # Allow if there is no match or the top match is an allow rule.
36
+ top_match.nil? || top_match.last.is_a?(AllowRule)
37
+ end
38
+
39
+ # Is this path disallowed for the given user agent?
40
+ #
41
+ # @param user_agent [String]
42
+ # @param path_and_query [String]
43
+ # @return [Boolean]
44
+ # @see PathRule#match
45
+ # @see #allow?
46
+ def disallow?(user_agent, path_and_query)
47
+ !allow?(user_agent, path_and_query)
48
+ end
49
+
50
+ # @!group Formatting Methods
51
+
52
+ # A human readable representation of the robots.txt.
53
+ #
54
+ # @return [String]
55
+ # @tool
56
+ # :nocov:
57
+ def inspect
58
+ "#<#{self.class.name}:#{object_id} #{rules.inspect}>"
59
+ end
60
+ # :nocov:
61
+
62
+ # Produces a pretty human readable representation of the robots.txt.
63
+ #
64
+ # @param pp [PrettyPrint] pretty printer
65
+ # @return [void]
66
+ # @tool
67
+ # :nocov:
68
+ def pretty_print(pp)
69
+ pp.text("#{self.class.name}/#{object_id}")
70
+ pp.group(1, "[", "]") do
71
+ pp.breakable("")
72
+ pp.seplist(rules) do |rule|
73
+ pp.pp(rule)
74
+ end
75
+ pp.breakable("")
76
+ end
77
+ end
78
+ # :nocov:
79
+
80
+ # @!endgroup Formatting Methods
81
+
82
+ private
83
+
84
+ def matches(user_agent, path)
85
+ # @type var groups: Array<Group>
86
+ groups = rules.select { |rule| rule.is_a?(Group) && rule.valid? && rule.apply?(user_agent) }
87
+ groups.flat_map do |group|
88
+ group.rules.filter_map do |rule|
89
+ next unless rule.is_a?(PathRule)
90
+
91
+ rule.match(path)
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
data/lib/gort/rule.rb ADDED
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gort
4
+ # Generic rule.
5
+ # This represents an entry that looks like a valid rule but otherwise doesn't
6
+ # have a more specialized implementation.
7
+ class Rule
8
+ # @param name [Symbol] The name of the rule.
9
+ # @param value [String] The value of the rule.
10
+ def initialize(name, value)
11
+ @name = name.downcase.to_sym
12
+ @value = value
13
+ end
14
+
15
+ # The name of the rule.
16
+ # @return [Symbol]
17
+ attr_reader :name
18
+
19
+ # The value of the rule.
20
+ # @return [String]
21
+ attr_reader :value
22
+
23
+ # @!group Formatting Methods
24
+
25
+ # A human readable representation of the rule.
26
+ #
27
+ # @return [String]
28
+ # @tool
29
+ # :nocov:
30
+ def inspect
31
+ %(#<#{self.class.name}:#{object_id} "#{name}", "#{value}">)
32
+ end
33
+ # :nocov:
34
+
35
+ # Produces a pretty human readable representation of the rule.
36
+ #
37
+ # @param pp [PrettyPrint] pretty printer
38
+ # @return [void]
39
+ # @tool
40
+ # :nocov:
41
+ def pretty_print(pp)
42
+ pp.text("#{self.class.name}/#{object_id}< #{name.inspect}, #{value} >")
43
+ end
44
+ # :nocov:
45
+
46
+ # @!endgroup Formatting Methods
47
+ end
48
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gort
4
+ # Abstract rule set.
5
+ #
6
+ # @abstract
7
+ class RuleSet
8
+ # @param rules [Array<Group, UserAgentRule, AllowRule, DisallowRule, Rule, InvalidLine>]
9
+ # The rules. Or invalid lines.
10
+ def initialize(*rules)
11
+ @rules = rules.flatten.freeze
12
+ end
13
+
14
+ # Rules in this set
15
+ # @return [Array<Group, UserAgentRule, AllowRule, DisallowRule, Rule, InvalidLine>]
16
+ attr_reader :rules
17
+
18
+ # Make a new set by mergin this one with another.
19
+ # @param other [RuleSet]
20
+ # @return [RuleSet]
21
+ def merge(other)
22
+ self.class.new(rules + other.rules)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "rule"
4
+
5
+ module Gort
6
+ # User-agent rule.
7
+ class UserAgentRule < Rule
8
+ def initialize(value)
9
+ super(:"user-agent", value)
10
+ end
11
+
12
+ PRODUCT_TOKEN_RE = /\A([a-z_-]+|\*)\z/i
13
+ private_constant :PRODUCT_TOKEN_RE
14
+
15
+ # Returns +true+ if the value is a valid user agent.
16
+ #
17
+ # A user agent token is a sequence of letters (a—z, A—Z), digits (0—9),
18
+ # underscores (_), or hyphens (-). Alternatively, a single asterisk (*) is also allowed.
19
+ #
20
+ # @return [Boolean]
21
+ # - +true+ if the value is a valid product token
22
+ # - +false+ otherwise
23
+ def valid?
24
+ value.match?(PRODUCT_TOKEN_RE)
25
+ end
26
+
27
+ # @!group Formatting Methods
28
+
29
+ # A human readable representation of the rule.
30
+ #
31
+ # @return [String]
32
+ # @tool
33
+ # :nocov:
34
+ def inspect
35
+ %(#<#{self.class.name}:#{object_id} "#{value}">)
36
+ end
37
+ # :nocov:
38
+
39
+ # Produces a pretty human readable representation of the rule.
40
+ #
41
+ # @param pp [PrettyPrint] pretty printer
42
+ # @return [void]
43
+ # @tool
44
+ # :nocov:
45
+ def pretty_print(pp)
46
+ pp.text("#{self.class.name}/#{object_id}< #{value} >")
47
+ end
48
+ # :nocov:
49
+
50
+ # @!endgroup Formatting Methods
51
+ end
52
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gort
4
+ # Gem version
5
+ VERSION = "0.1.0"
6
+ end
data/lib/gort.rb ADDED
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "gort/version"
4
+
5
+ # Gort is a robots.txt parser and evaluator.
6
+ module Gort
7
+ # Gort's top error class. All other errors inherit from this.
8
+ class Error < StandardError; end
9
+
10
+ # Parse the given robots.txt input and return a RobotsTxt instance.
11
+ #
12
+ # @param input [String] the robots.txt input to parse
13
+ # @return [RobotsTxt] the parsed robots.txt
14
+ def self.parse(input)
15
+ Parser.new(input).parse
16
+ end
17
+ end
18
+
19
+ require_relative "gort/parser"
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gort
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Mankuta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIC+jCCAeKgAwIBAgIBAzANBgkqhkiG9w0BAQsFADAjMSEwHwYDVQQDDBhhbGV4
14
+ L0RDPXBvaW50bGVzcy9EQz1vbmUwHhcNMjMxMTA5MTA1MzIxWhcNMjQxMTA4MTA1
15
+ MzIxWjAjMSEwHwYDVQQDDBhhbGV4L0RDPXBvaW50bGVzcy9EQz1vbmUwggEiMA0G
16
+ CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPOVLPGEK+eaP6zJfifrpWvPTg4qo3
17
+ XNJJPom80SwqX2hVCVsRDK4RYgKUQqKRQzHhlx14wZHwWLETBVbNDGX3uqyCnTWU
18
+ JUKh3ydiZShXpNHoV/NW7hhEYvNsDcBAjYTmbvXOhuYCo0Tz/0N2Oiun/0wIICtP
19
+ vytY9TY0/lklWjAbsqJjNOu3o8IYkJBAN/rU96E/6WhFwjnxLcTnV9RfFRXdjG5j
20
+ CughoB2xSwKX8gwbQ8fsnaZRmdyDGYNpz6sGF0zycfiLkTttbLA2nYATCALy98CH
21
+ nsyZNsTjb4WINCuY2yEDjwesw9f/ROkNC68EgQ5M+aMjp+D0WcYGfzojAgMBAAGj
22
+ OTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0GA1UdDgQWBBRPgIwSVbeonua/
23
+ Ny/8576oxdUbrjANBgkqhkiG9w0BAQsFAAOCAQEAX28QLxNNz5EgaZZuQQUkbOXB
24
+ 4b5luBO22535+Vgj2jw7yjV8KKoGMWKrnB00ijgntqPEPXCzaPNibOcPZV5WfWVS
25
+ t0Ls8lWE/8kezPwV4SbRe4Y7C+D4J+oirs0L5PtpREV9CJ7kfdW/AN9MtvjjBFlb
26
+ jHquD/MiOOMyHtuO0FiTL265m10thcAUsbyi0MehKgGbtJ5fGceHvZDqDouvbMjT
27
+ hoijFk1oTY939JhjdcHuJzMiS2TrqIw8Dr5DkQu2vAjHpw0aOOWhlRjNJ7RHYJNm
28
+ QugXmCnHQxSKTmc7imKuotyMdRRKFh8UEFCLRsFtBbNxkXyNuB4xBMuUYodhEw==
29
+ -----END CERTIFICATE-----
30
+ date: 2024-06-22 00:00:00.000000000 Z
31
+ dependencies:
32
+ - !ruby/object:Gem::Dependency
33
+ name: addressable
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - "~>"
37
+ - !ruby/object:Gem::Version
38
+ version: '2.8'
39
+ type: :runtime
40
+ prerelease: false
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - "~>"
44
+ - !ruby/object:Gem::Version
45
+ version: '2.8'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rchardet
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - "~>"
51
+ - !ruby/object:Gem::Version
52
+ version: '1.8'
53
+ type: :runtime
54
+ prerelease: false
55
+ version_requirements: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '1.8'
60
+ description: robots.txt parser and evaluator according to RFC 9309.
61
+ email:
62
+ - alex@pointless.one
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - LICENSE.txt
68
+ - lib/gort.rb
69
+ - lib/gort/allow_rule.rb
70
+ - lib/gort/disallow_rule.rb
71
+ - lib/gort/group.rb
72
+ - lib/gort/invalid_line.rb
73
+ - lib/gort/parser.rb
74
+ - lib/gort/path_rule.rb
75
+ - lib/gort/robots_txt.rb
76
+ - lib/gort/rule.rb
77
+ - lib/gort/rule_set.rb
78
+ - lib/gort/user_agent_rule.rb
79
+ - lib/gort/version.rb
80
+ homepage:
81
+ licenses:
82
+ - MIT
83
+ metadata:
84
+ allowed_push_host: https://rubygems.org
85
+ rubygems_mfa_required: 'true'
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '3.1'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubygems_version: 3.5.9
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: robots.txt parser and evaluator.
105
+ test_files: []
metadata.gz.sig ADDED
@@ -0,0 +1,2 @@
1
+ f�*a��z0G���p��h:P3��H�I�wj�n�394��?z˩ޚ���H��p1��.������\0h&GP��� &I2ߪ��Ȕ˒49 �Esp�f�>y�"$���p�J����Tќh�������=���2vZ������]G�.��T�ƹ�0<x�8su�V� nv�yI� �L�B
2
+ �F�������0ς��c��8�c�3��f6�e�JuA��Zg����%��í�� �LE:t�{��