wovnrb 1.0.13 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +1 -1
- data/lib/wovnrb.rb +7 -0
- data/lib/wovnrb/html_replacers/replacer_base.rb +2 -1
- data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +76 -0
- data/lib/wovnrb/html_replacers/unified_values/element_category.rb +242 -0
- data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +134 -0
- data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +35 -0
- data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +152 -0
- data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +65 -0
- data/lib/wovnrb/lang.rb +6 -1
- data/lib/wovnrb/services/value_agent.rb +9 -0
- data/lib/wovnrb/store.rb +2 -9
- data/lib/wovnrb/version.rb +1 -1
- data/test/fixtures/unified_values/site_html/simple_actual.html +96 -0
- data/test/fixtures/unified_values/site_html/simple_expected.json +251 -0
- data/test/fixtures/unified_values/site_html/wovn.io_actual.html +686 -0
- data/test/fixtures/unified_values/site_html/wovn.io_expected.json +543 -0
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +1024 -0
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +3345 -0
- data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +22 -0
- data/test/fixtures/unified_values/small_html/br_tag_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/br_tag_expected.json +12 -0
- data/test/fixtures/unified_values/small_html/comment_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/comment_tag_expected.json +10 -0
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +7 -0
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +11 -0
- data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +14 -0
- data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +8 -0
- data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +20 -0
- data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +20 -0
- data/test/fixtures/unified_values/small_html/empty_tag_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/empty_tag_expected.json +12 -0
- data/test/fixtures/unified_values/small_html/empty_text_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/empty_text_expected.json +1 -0
- data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +16 -0
- data/test/fixtures/unified_values/small_html/ignored_class_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/ignored_class_expected.json +13 -0
- data/test/fixtures/unified_values/small_html/img_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/img_expected.json +23 -0
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +16 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +12 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +14 -0
- data/test/fixtures/unified_values/small_html/option_tag_actual.html +9 -0
- data/test/fixtures/unified_values/small_html/option_tag_expected.json +13 -0
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +22 -0
- data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +9 -0
- data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +8 -0
- data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +6 -0
- data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +8 -0
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +24 -0
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +14 -0
- data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +13 -0
- data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +137 -0
- data/test/lib/html_replacers/unified_values/element_category_test.rb +49 -0
- data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +137 -0
- data/test/lib/html_replacers/unified_values/text_replacer_test.rb +270 -0
- data/test/lib/html_replacers/unified_values/text_scraper_test.rb +121 -0
- data/test/lib/html_replacers/unified_values/values_stack_test.rb +122 -0
- data/test/lib/lang_test.rb +59 -1
- data/test/lib/services/value_agent_test.rb +32 -0
- data/test/test_helper.rb +18 -2
- data/wovnrb.gemspec +1 -0
- metadata +134 -7
- data/spec/spec_helper.rb +0 -2
- data/spec/wovnrb_spec.rb +0 -7
@@ -0,0 +1,35 @@
|
|
1
|
+
module Wovnrb
|
2
|
+
module UnifiedValues
|
3
|
+
class TextReplacer < ReplacerBase
|
4
|
+
def initialize(store, text_index)
|
5
|
+
super(store)
|
6
|
+
@text_index = text_index
|
7
|
+
end
|
8
|
+
|
9
|
+
def replace(dom, lang)
|
10
|
+
translated_nodes_with_targets = NodeSwappingTargetsCreator.new(TextScraper.new(@ignored_class_set).run(dom)).run!
|
11
|
+
text_index_with_targets = DstSwappingTargetsCreator.new(@text_index).run!
|
12
|
+
|
13
|
+
translated_nodes_with_targets.each do |translated_nodes_with_target|
|
14
|
+
dst_swapping_targets =
|
15
|
+
text_index_with_targets[translated_nodes_with_target[:dst]]
|
16
|
+
.try(:fetch, lang.lang_code, nil)
|
17
|
+
.try(:first)
|
18
|
+
.try(:fetch, 'swapping_targets', nil)
|
19
|
+
next unless dst_swapping_targets
|
20
|
+
|
21
|
+
translated_nodes_with_target[:swapping_targets].each_with_index do |node_swapping_target, index|
|
22
|
+
# NOTE: current logic to swap back search text node and find wovn-src base on the text node.
|
23
|
+
# if `translated_text` is empry string, translated html don't have text node. it means that widget can't find wovn-src and swap back
|
24
|
+
# so we use `\u200b`(ZERO WIDTH SPACE) instead of empty string
|
25
|
+
translated_text = dst_swapping_targets[index].blank? ? "\u200b" : dst_swapping_targets[index]
|
26
|
+
original_text = node_swapping_target.content
|
27
|
+
|
28
|
+
node_swapping_target.content = translated_text
|
29
|
+
add_comment_node(node_swapping_target, original_text)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module Wovnrb
|
2
|
+
module UnifiedValues
|
3
|
+
class TextScraper
|
4
|
+
def initialize(ignored_class_set)
|
5
|
+
@ignored_class_set = ignored_class_set
|
6
|
+
@values = []
|
7
|
+
@values_stack = nil
|
8
|
+
end
|
9
|
+
|
10
|
+
def run(dom)
|
11
|
+
refresh_all!
|
12
|
+
@values_stack = ValuesStack.new(dom.path, 1)
|
13
|
+
scrape(dom)
|
14
|
+
end
|
15
|
+
|
16
|
+
def create_text_value(src, node_stack)
|
17
|
+
{ dst: ValueAgent.normalize_text(src.gsub(/ /i, ' ')), nodes: node_stack }
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def scrape(dom)
|
23
|
+
type = stop_recursion_type(dom)
|
24
|
+
if type
|
25
|
+
case type
|
26
|
+
when 'ignore_element', 'skip_element'
|
27
|
+
next_stack = @values_stack.build_next_stack
|
28
|
+
build_src
|
29
|
+
@values_stack = next_stack
|
30
|
+
when 'text_element'
|
31
|
+
@values_stack.add_text_element(dom, dom.content)
|
32
|
+
when 'empty_element'
|
33
|
+
@values_stack.add(dom, empty_tag(dom))
|
34
|
+
when 'comment_element'
|
35
|
+
# do nothing
|
36
|
+
else
|
37
|
+
raise 'Unsupported type'
|
38
|
+
end
|
39
|
+
elsif inline_element?(dom)
|
40
|
+
@values_stack.add(dom, start_tag(dom))
|
41
|
+
scrape_children_of(dom)
|
42
|
+
@values_stack.add(dom, end_tag(dom)) unless @values_stack.blank?
|
43
|
+
elsif block_element?(dom)
|
44
|
+
next_stack = @values_stack.build_next_stack
|
45
|
+
build_src
|
46
|
+
@values_stack = ValuesStack.new(dom.path, 1)
|
47
|
+
scrape_children_of(dom)
|
48
|
+
build_src
|
49
|
+
@values_stack = next_stack
|
50
|
+
else
|
51
|
+
next_stack = @values_stack.build_next_stack
|
52
|
+
build_src
|
53
|
+
@values_stack = ValuesStack.new(dom.path, 1)
|
54
|
+
scrape_children_of(dom)
|
55
|
+
build_src
|
56
|
+
@values_stack = next_stack
|
57
|
+
end
|
58
|
+
|
59
|
+
@values
|
60
|
+
end
|
61
|
+
|
62
|
+
def scrape_children_of(dom)
|
63
|
+
dom.children.each { |c| scrape(c) } unless wovn_ignore_element?(dom)
|
64
|
+
end
|
65
|
+
|
66
|
+
def build_src
|
67
|
+
return nil if @values_stack.blank?
|
68
|
+
|
69
|
+
src = @values_stack.src
|
70
|
+
@values << create_text_value(src, @values_stack.node_stack) if src.present? && !tag_only?(src)
|
71
|
+
@values_stack = nil
|
72
|
+
end
|
73
|
+
|
74
|
+
def tag_only?(src)
|
75
|
+
Nokogiri::HTML5.fragment(src).text.blank?
|
76
|
+
end
|
77
|
+
|
78
|
+
def start_tag(dom)
|
79
|
+
if wovn_ignore_element?(dom)
|
80
|
+
"<#{dom.name} wovn-ignore>"
|
81
|
+
else
|
82
|
+
"<#{dom.name}>"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def end_tag(dom)
|
87
|
+
"</#{dom.name}>"
|
88
|
+
end
|
89
|
+
|
90
|
+
def empty_tag(dom)
|
91
|
+
start_tag(dom)
|
92
|
+
end
|
93
|
+
|
94
|
+
def block_element?(element)
|
95
|
+
ElementCategory::BLOCK_ELEMENTS.include?(element.name)
|
96
|
+
end
|
97
|
+
|
98
|
+
def inline_element?(element)
|
99
|
+
ElementCategory::INLINE_ELEMENTS.include?(element.name)
|
100
|
+
end
|
101
|
+
|
102
|
+
def empty_element?(element)
|
103
|
+
ElementCategory::EMPTY_ELEMENTS.include?(element.name)
|
104
|
+
end
|
105
|
+
|
106
|
+
def ignore_element?(element)
|
107
|
+
ElementCategory::IGNORE_ELEMENTS.include?(element.name)
|
108
|
+
end
|
109
|
+
|
110
|
+
def skip_element?(element)
|
111
|
+
ElementCategory::SKIP_ELEMENTS.include?(element.name)
|
112
|
+
end
|
113
|
+
|
114
|
+
def comment_element?(element)
|
115
|
+
element.comment?
|
116
|
+
end
|
117
|
+
|
118
|
+
def text_element?(element)
|
119
|
+
element.text?
|
120
|
+
end
|
121
|
+
|
122
|
+
def wovn_ignore_element?(element)
|
123
|
+
return false unless element
|
124
|
+
|
125
|
+
return true if element.attribute('wovn-ignore')
|
126
|
+
|
127
|
+
class_attribute = element.attribute('class')
|
128
|
+
return false unless class_attribute
|
129
|
+
class_attribute.value.split.any? { |c| @ignored_class_set.include?(c) }
|
130
|
+
end
|
131
|
+
|
132
|
+
def stop_recursion_type(element)
|
133
|
+
return 'ignore_element' if ignore_element?(element)
|
134
|
+
return 'skip_element' if skip_element?(element)
|
135
|
+
return 'text_element' if text_element?(element)
|
136
|
+
return 'empty_element' if empty_element?(element)
|
137
|
+
return 'comment_element' if comment_element?(element)
|
138
|
+
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
|
142
|
+
def refresh_all!
|
143
|
+
@values_stack = nil
|
144
|
+
refresh_values!
|
145
|
+
end
|
146
|
+
|
147
|
+
def refresh_values!
|
148
|
+
@values = []
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'wovnrb/services/value_agent'
|
2
|
+
|
3
|
+
module Wovnrb
|
4
|
+
module UnifiedValues
|
5
|
+
class ValuesStack
|
6
|
+
attr_reader :node_stack
|
7
|
+
|
8
|
+
# @param head_path [String]
|
9
|
+
# @param index [Number]
|
10
|
+
#
|
11
|
+
# Be careful xpath's index starts with 1
|
12
|
+
def initialize(head_path, index)
|
13
|
+
@head_path = head_path
|
14
|
+
@index = index
|
15
|
+
@node_stack = []
|
16
|
+
@src_stack = []
|
17
|
+
@src_without_tag_stack = []
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param node [Nokogiri::XML::Element]
|
21
|
+
# @param src [String]
|
22
|
+
def add(node, src)
|
23
|
+
@node_stack << node if node.name != 'text' || node.content.present?
|
24
|
+
@src_stack << Wovnrb::ValueAgent.normalize_text(src)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @param node [Nokogiri::XML::Element]
|
28
|
+
# @param dom_content [String]
|
29
|
+
def add_text_element(node, dom_content)
|
30
|
+
add(node, CGI.escapeHTML(dom_content))
|
31
|
+
@src_without_tag_stack << Wovnrb::ValueAgent.normalize_text(dom_content)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [Bool]
|
35
|
+
def blank?
|
36
|
+
@src_stack.blank?
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [String]
|
40
|
+
def path
|
41
|
+
return @head_path if @head_path.end_with?('title')
|
42
|
+
|
43
|
+
# Ends with "text()" because some type checking takes path as a normal text when the path ends with "text()"
|
44
|
+
p = "#{@head_path}/text()"
|
45
|
+
|
46
|
+
@index == 1 ? p : "#{p}[#{@index}]"
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String]
|
50
|
+
def src
|
51
|
+
@src_stack.inject(:+)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [String]
|
55
|
+
def src_without_tag
|
56
|
+
@src_without_tag_stack.inject(:+)
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [ValuesStack]
|
60
|
+
def build_next_stack
|
61
|
+
ValuesStack.new(@head_path, @index + 1)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/wovnrb/lang.rb
CHANGED
@@ -204,6 +204,7 @@ module Wovnrb
|
|
204
204
|
|
205
205
|
def replace_dom_values(dom, values, store, url, headers)
|
206
206
|
text_index = values['text_vals'] || {}
|
207
|
+
html_text_index = values['html_text_vals'] || {}
|
207
208
|
src_index = values['img_vals'] || {}
|
208
209
|
img_src_prefix = values['img_src_prefix'] || ''
|
209
210
|
host_aliases = values['host_aliases'] || []
|
@@ -215,7 +216,11 @@ module Wovnrb
|
|
215
216
|
replacers << LinkReplacer.new(store, pattern, headers)
|
216
217
|
end
|
217
218
|
|
218
|
-
|
219
|
+
unless html_text_index.empty?
|
220
|
+
replacers << UnifiedValues::TextReplacer.new(store, html_text_index)
|
221
|
+
else
|
222
|
+
replacers << TextReplacer.new(store, text_index)
|
223
|
+
end
|
219
224
|
replacers << MetaReplacer.new(store, text_index, pattern, headers)
|
220
225
|
replacers << InputReplacer.new(store, text_index)
|
221
226
|
replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module Wovnrb
|
2
|
+
class ValueAgent
|
3
|
+
def self.normalize_text(src)
|
4
|
+
src.gsub(/[\ufffd]/, "\b")
|
5
|
+
.gsub(/[\n \t\u0020\u0009\u000C\u200B\u000D\u000A]+/, ' ')
|
6
|
+
.gsub(/^[\s\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+|[\s\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+$/, '')
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
data/lib/wovnrb/store.rb
CHANGED
@@ -6,6 +6,7 @@ require 'wovnrb/services/wovn_logger'
|
|
6
6
|
require 'wovnrb/services/glob'
|
7
7
|
require 'wovnrb/settings'
|
8
8
|
require 'active_support'
|
9
|
+
require 'active_support/core_ext'
|
9
10
|
|
10
11
|
module Wovnrb
|
11
12
|
class Store
|
@@ -64,43 +65,35 @@ module Wovnrb
|
|
64
65
|
errors = [];
|
65
66
|
#if valid_token?(!settings.has_key?('project_token') || settings['project_token'].length < 5 || settings['project_token'].length > 6
|
66
67
|
if !valid_token?(settings['project_token'])
|
67
|
-
valid = false
|
68
68
|
errors.push("Project token #{settings['project_token']} is not valid.")
|
69
69
|
end
|
70
70
|
if settings.has_key?('ignore_paths') && !settings['ignore_paths'].kind_of?(Array)
|
71
|
-
valid = false
|
72
71
|
errors.push("Ignore Paths #{settings['ignore_paths']} should be Array.")
|
73
72
|
end
|
74
73
|
if !settings.has_key?('url_pattern') || settings['url_pattern'].length == 0
|
75
|
-
valid = false
|
76
74
|
errors.push("Url pattern #{settings['url_pattern']} is not valid.")
|
77
75
|
end
|
78
76
|
if !settings.has_key?('query') || !settings['query'].kind_of?(Array)
|
79
|
-
valid = false
|
80
77
|
errors.push("query config #{settings['query']} is not valid.")
|
81
78
|
end
|
82
79
|
if !settings.has_key?('ignore_class') || !settings['ignore_class'].kind_of?(Array)
|
83
|
-
valid = false
|
84
80
|
errors.push("ignore_class config #{settings['ignore_class']} should be Array.")
|
85
81
|
end
|
86
82
|
if !settings.has_key?('api_url') || settings['api_url'].length == 0
|
87
|
-
valid = false
|
88
83
|
errors.push("API URL is not configured.")
|
89
84
|
end
|
90
85
|
if !settings.has_key?('default_lang') || settings['default_lang'].nil?
|
91
|
-
valid = false
|
92
86
|
errors.push("Default lang #{settings['default_lang']} is not valid.")
|
93
87
|
end
|
94
88
|
if !settings.has_key?('supported_langs') || !settings['supported_langs'].kind_of?(Array) || settings['supported_langs'].size < 1
|
95
|
-
valid = false
|
96
89
|
errors.push("Supported langs configuration is not valid.")
|
97
90
|
end
|
98
91
|
if !settings.has_key?('custom_lang_aliases') || !settings['custom_lang_aliases'].kind_of?(Hash)
|
99
|
-
valid = false
|
100
92
|
errors.push("Custom lang aliases is not valid.")
|
101
93
|
end
|
102
94
|
# log errors
|
103
95
|
if errors.length > 0
|
96
|
+
valid = false
|
104
97
|
errors.each do |e|
|
105
98
|
WovnLogger.instance.error(e)
|
106
99
|
end
|
data/lib/wovnrb/version.rb
CHANGED
@@ -0,0 +1,96 @@
|
|
1
|
+
<html>
|
2
|
+
<script>console.log('hello')</script>
|
3
|
+
<title>SIMPLE TITLE</title>
|
4
|
+
<meta name="description" content="meta description">
|
5
|
+
<body>
|
6
|
+
<div>
|
7
|
+
<form role="search">
|
8
|
+
<div class="search-control">
|
9
|
+
<input type="search" id="site-search" name="q"
|
10
|
+
placeholder="Search the site..."
|
11
|
+
aria-label="Search through site content">
|
12
|
+
<button>Search</button>
|
13
|
+
</div>
|
14
|
+
</form>
|
15
|
+
|
16
|
+
<form role="search" method="get" class="search-form wb-flex" action="https://www.wovn.io/">
|
17
|
+
<label>
|
18
|
+
<span>Search for:</span>
|
19
|
+
<input type="search" placeholder="Search …" value="" name="s" title="Search for:">
|
20
|
+
</label>
|
21
|
+
</form>
|
22
|
+
</div>
|
23
|
+
<div>
|
24
|
+
<span>
|
25
|
+
<p>Well</p>
|
26
|
+
</span>
|
27
|
+
</div>
|
28
|
+
<a class="home">
|
29
|
+
<span><h1>scrap</h1>only
|
30
|
+
<img src="/assets/hedgehog.jpg">
|
31
|
+
this too</span>
|
32
|
+
</a><br>
|
33
|
+
<span href="/" class="home">
|
34
|
+
<a><img src="/assets/hedgehog.jpg"></a>
|
35
|
+
<a>"Storm hunters" it is to see.</a>
|
36
|
+
<a>BLABLA5</a>
|
37
|
+
</span>
|
38
|
+
<div>
|
39
|
+
<form class="ftr-locator-form">
|
40
|
+
<input name="hdr-ftr-locator-term" placeholder="Street, City, State or ZIP" type="text">
|
41
|
+
<input type="submit">
|
42
|
+
|
43
|
+
</form>
|
44
|
+
</div>
|
45
|
+
<h1>
|
46
|
+
<div style="opacity: 1;">Welcome to my Website!</div>
|
47
|
+
</h1>
|
48
|
+
<div>
|
49
|
+
<div class="content-asset"><div class="heading1">LOST AF?</div>
|
50
|
+
<div class="error-info">
|
51
|
+
<p>WE'LL TAKE THE BLAME.</p>
|
52
|
+
<p>LET US MAKE IT UP TO YOU WITH SOME LINKS.</p>
|
53
|
+
</div>
|
54
|
+
|
55
|
+
<div class="shopping-link">
|
56
|
+
<nav class="gender-links">
|
57
|
+
<ul>
|
58
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/mens/" class="nav-link">SHOP MENS</a></li>
|
59
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/womens/" class="nav-link">SHOP WOMENS</a></li>
|
60
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/boys/" class="nav-link">shop boys</a></li>
|
61
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/girls/" class="nav-link">shop girls</a></li>
|
62
|
+
</ul>
|
63
|
+
</nav>
|
64
|
+
</div></div>
|
65
|
+
<div class="error-custom-message noshow">"<div class="heading1">LOST AF?</div>
|
66
|
+
<div class="error-info">
|
67
|
+
<p>WE'LL TAKE THE BLAME.</p>
|
68
|
+
<p>LET US MAKE IT UP TO YOU WITH SOME LINKS.</p>
|
69
|
+
</div>
|
70
|
+
|
71
|
+
<div class="shopping-link">
|
72
|
+
<nav class="gender-links">
|
73
|
+
<ul>
|
74
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/mens/" class="nav-link">SHOP MENS</a></li>
|
75
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/womens/" class="nav-link">SHOP WOMENS</a></li>
|
76
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/boys/" class="nav-link">shop boys</a></li>
|
77
|
+
<li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/girls/" class="nav-link">shop girls</a></li>
|
78
|
+
</ul>
|
79
|
+
</nav>
|
80
|
+
</div></div>"
|
81
|
+
</div>
|
82
|
+
|
83
|
+
<p>This is a paragraph! Here's how you make a link: <a href="https://neocities.org">Neocities</a>.</p>
|
84
|
+
|
85
|
+
<p>Here's how you can make <strong>bold</strong> and <em>italic</em> text.</p>
|
86
|
+
<p>Here's how you can add an image:</p>
|
87
|
+
<p>Here's how to make a list:</p>
|
88
|
+
<ul>
|
89
|
+
<li href="/">First thing</li>
|
90
|
+
<li>Second thing</li>
|
91
|
+
<li>Third thing</li>
|
92
|
+
</ul>
|
93
|
+
|
94
|
+
<p>To learn more HTML/CSS, check out these <a href="https://neocities.org/tutorials">tutorials</a>!</p>
|
95
|
+
</body>
|
96
|
+
</html>
|
@@ -0,0 +1,251 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"xpath": "/html/head/title",
|
4
|
+
"srcs": [
|
5
|
+
"SIMPLE TITLE"
|
6
|
+
]
|
7
|
+
},
|
8
|
+
{
|
9
|
+
"xpath": "/html/body/div/form/div/text()",
|
10
|
+
"srcs": [
|
11
|
+
"<input>",
|
12
|
+
"<button>",
|
13
|
+
"Search",
|
14
|
+
"</button>"
|
15
|
+
]
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"xpath": "/html/body/div/form[2]/text()",
|
19
|
+
"srcs": [
|
20
|
+
"<label>",
|
21
|
+
"<span>",
|
22
|
+
"Search for:",
|
23
|
+
"</span>",
|
24
|
+
"<input>",
|
25
|
+
"</label>"
|
26
|
+
]
|
27
|
+
},
|
28
|
+
{
|
29
|
+
"xpath": "/html/body/div[2]/span/p/text()",
|
30
|
+
"srcs": [
|
31
|
+
"Well"
|
32
|
+
]
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"xpath": "/html/body/a/span/h1/text()",
|
36
|
+
"srcs": [
|
37
|
+
"scrap"
|
38
|
+
]
|
39
|
+
},
|
40
|
+
{
|
41
|
+
"xpath": "/html/body/text()[4]",
|
42
|
+
"srcs": [
|
43
|
+
"only"
|
44
|
+
]
|
45
|
+
},
|
46
|
+
{
|
47
|
+
"xpath": "/html/body/text()[5]",
|
48
|
+
"srcs": [
|
49
|
+
"this too",
|
50
|
+
"</span>",
|
51
|
+
"</a>",
|
52
|
+
"<br>",
|
53
|
+
"<span>",
|
54
|
+
"<a>"
|
55
|
+
]
|
56
|
+
},
|
57
|
+
{
|
58
|
+
"xpath": "/html/body/text()[6]",
|
59
|
+
"srcs": [
|
60
|
+
"<a>",
|
61
|
+
""Storm hunters" it is to see.",
|
62
|
+
"</a>",
|
63
|
+
"<a>",
|
64
|
+
"BLABLA5",
|
65
|
+
"</a>",
|
66
|
+
"</span>"
|
67
|
+
]
|
68
|
+
},
|
69
|
+
{
|
70
|
+
"xpath": "/html/body/h1/div/text()",
|
71
|
+
"srcs": [
|
72
|
+
"Welcome to my Website!"
|
73
|
+
]
|
74
|
+
},
|
75
|
+
{
|
76
|
+
"xpath": "/html/body/div[4]/div/div/text()",
|
77
|
+
"srcs": [
|
78
|
+
"LOST AF?"
|
79
|
+
]
|
80
|
+
},
|
81
|
+
{
|
82
|
+
"xpath": "/html/body/div[4]/div/div[2]/p/text()",
|
83
|
+
"srcs": [
|
84
|
+
"WE'LL TAKE THE BLAME."
|
85
|
+
]
|
86
|
+
},
|
87
|
+
{
|
88
|
+
"xpath": "/html/body/div[4]/div/div[2]/p[2]/text()",
|
89
|
+
"srcs": [
|
90
|
+
"LET US MAKE IT UP TO YOU WITH SOME LINKS."
|
91
|
+
]
|
92
|
+
},
|
93
|
+
{
|
94
|
+
"xpath": "/html/body/div[4]/div/div[3]/nav/ul/li/text()",
|
95
|
+
"srcs": [
|
96
|
+
"<a>",
|
97
|
+
"SHOP MENS",
|
98
|
+
"</a>"
|
99
|
+
]
|
100
|
+
},
|
101
|
+
{
|
102
|
+
"xpath": "/html/body/div[4]/div/div[3]/nav/ul/li[2]/text()",
|
103
|
+
"srcs": [
|
104
|
+
"<a>",
|
105
|
+
"SHOP WOMENS",
|
106
|
+
"</a>"
|
107
|
+
]
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"xpath": "/html/body/div[4]/div/div[3]/nav/ul/li[3]/text()",
|
111
|
+
"srcs": [
|
112
|
+
"<a>",
|
113
|
+
"shop boys",
|
114
|
+
"</a>"
|
115
|
+
]
|
116
|
+
},
|
117
|
+
{
|
118
|
+
"xpath": "/html/body/div[4]/div/div[3]/nav/ul/li[4]/text()",
|
119
|
+
"srcs": [
|
120
|
+
"<a>",
|
121
|
+
"shop girls",
|
122
|
+
"</a>"
|
123
|
+
]
|
124
|
+
},
|
125
|
+
{
|
126
|
+
"xpath": "/html/body/div[4]/div[2]/text()",
|
127
|
+
"srcs": [
|
128
|
+
"""
|
129
|
+
]
|
130
|
+
},
|
131
|
+
{
|
132
|
+
"xpath": "/html/body/div[4]/div[2]/div/text()",
|
133
|
+
"srcs": [
|
134
|
+
"LOST AF?"
|
135
|
+
]
|
136
|
+
},
|
137
|
+
{
|
138
|
+
"xpath": "/html/body/div[4]/div[2]/div[2]/p/text()",
|
139
|
+
"srcs": [
|
140
|
+
"WE'LL TAKE THE BLAME."
|
141
|
+
]
|
142
|
+
},
|
143
|
+
{
|
144
|
+
"xpath": "/html/body/div[4]/div[2]/div[2]/p[2]/text()",
|
145
|
+
"srcs": [
|
146
|
+
"LET US MAKE IT UP TO YOU WITH SOME LINKS."
|
147
|
+
]
|
148
|
+
},
|
149
|
+
{
|
150
|
+
"xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li/text()",
|
151
|
+
"srcs": [
|
152
|
+
"<a>",
|
153
|
+
"SHOP MENS",
|
154
|
+
"</a>"
|
155
|
+
]
|
156
|
+
},
|
157
|
+
{
|
158
|
+
"xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li[2]/text()",
|
159
|
+
"srcs": [
|
160
|
+
"<a>",
|
161
|
+
"SHOP WOMENS",
|
162
|
+
"</a>"
|
163
|
+
]
|
164
|
+
},
|
165
|
+
{
|
166
|
+
"xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li[3]/text()",
|
167
|
+
"srcs": [
|
168
|
+
"<a>",
|
169
|
+
"shop boys",
|
170
|
+
"</a>"
|
171
|
+
]
|
172
|
+
},
|
173
|
+
{
|
174
|
+
"xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li[4]/text()",
|
175
|
+
"srcs": [
|
176
|
+
"<a>",
|
177
|
+
"shop girls",
|
178
|
+
"</a>"
|
179
|
+
]
|
180
|
+
},
|
181
|
+
{
|
182
|
+
"xpath": "/html/body/div[4]/text()[3]",
|
183
|
+
"srcs": [
|
184
|
+
"""
|
185
|
+
]
|
186
|
+
},
|
187
|
+
{
|
188
|
+
"xpath": "/html/body/p/text()",
|
189
|
+
"srcs": [
|
190
|
+
"This is a paragraph! Here's how you make a link:",
|
191
|
+
"<a>",
|
192
|
+
"Neocities",
|
193
|
+
"</a>",
|
194
|
+
"."
|
195
|
+
]
|
196
|
+
},
|
197
|
+
{
|
198
|
+
"xpath": "/html/body/p[2]/text()",
|
199
|
+
"srcs": [
|
200
|
+
"Here's how you can make",
|
201
|
+
"<strong>",
|
202
|
+
"bold",
|
203
|
+
"</strong>",
|
204
|
+
"and",
|
205
|
+
"<em>",
|
206
|
+
"italic",
|
207
|
+
"</em>",
|
208
|
+
"text."
|
209
|
+
]
|
210
|
+
},
|
211
|
+
{
|
212
|
+
"xpath": "/html/body/p[3]/text()",
|
213
|
+
"srcs": [
|
214
|
+
"Here's how you can add an image:"
|
215
|
+
]
|
216
|
+
},
|
217
|
+
{
|
218
|
+
"xpath": "/html/body/p[4]/text()",
|
219
|
+
"srcs": [
|
220
|
+
"Here's how to make a list:"
|
221
|
+
]
|
222
|
+
},
|
223
|
+
{
|
224
|
+
"xpath": "/html/body/ul/li/text()",
|
225
|
+
"srcs": [
|
226
|
+
"First thing"
|
227
|
+
]
|
228
|
+
},
|
229
|
+
{
|
230
|
+
"xpath": "/html/body/ul/li[2]/text()",
|
231
|
+
"srcs": [
|
232
|
+
"Second thing"
|
233
|
+
]
|
234
|
+
},
|
235
|
+
{
|
236
|
+
"xpath": "/html/body/ul/li[3]/text()",
|
237
|
+
"srcs": [
|
238
|
+
"Third thing"
|
239
|
+
]
|
240
|
+
},
|
241
|
+
{
|
242
|
+
"xpath": "/html/body/p[5]/text()",
|
243
|
+
"srcs": [
|
244
|
+
"To learn more HTML/CSS, check out these",
|
245
|
+
"<a>",
|
246
|
+
"tutorials",
|
247
|
+
"</a>",
|
248
|
+
"!"
|
249
|
+
]
|
250
|
+
}
|
251
|
+
]
|