wovnrb 1.0.13 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +1 -1
  3. data/lib/wovnrb.rb +7 -0
  4. data/lib/wovnrb/html_replacers/replacer_base.rb +2 -1
  5. data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +76 -0
  6. data/lib/wovnrb/html_replacers/unified_values/element_category.rb +242 -0
  7. data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +134 -0
  8. data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +35 -0
  9. data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +152 -0
  10. data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +65 -0
  11. data/lib/wovnrb/lang.rb +6 -1
  12. data/lib/wovnrb/services/value_agent.rb +9 -0
  13. data/lib/wovnrb/store.rb +2 -9
  14. data/lib/wovnrb/version.rb +1 -1
  15. data/test/fixtures/unified_values/site_html/simple_actual.html +96 -0
  16. data/test/fixtures/unified_values/site_html/simple_expected.json +251 -0
  17. data/test/fixtures/unified_values/site_html/wovn.io_actual.html +686 -0
  18. data/test/fixtures/unified_values/site_html/wovn.io_expected.json +543 -0
  19. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +1024 -0
  20. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +3345 -0
  21. data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +12 -0
  22. data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +22 -0
  23. data/test/fixtures/unified_values/small_html/br_tag_actual.html +10 -0
  24. data/test/fixtures/unified_values/small_html/br_tag_expected.json +12 -0
  25. data/test/fixtures/unified_values/small_html/comment_tag_actual.html +12 -0
  26. data/test/fixtures/unified_values/small_html/comment_tag_expected.json +10 -0
  27. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +7 -0
  28. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +11 -0
  29. data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +14 -0
  30. data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +8 -0
  31. data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +20 -0
  32. data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +20 -0
  33. data/test/fixtures/unified_values/small_html/empty_tag_actual.html +10 -0
  34. data/test/fixtures/unified_values/small_html/empty_tag_expected.json +12 -0
  35. data/test/fixtures/unified_values/small_html/empty_text_actual.html +12 -0
  36. data/test/fixtures/unified_values/small_html/empty_text_expected.json +1 -0
  37. data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +12 -0
  38. data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +16 -0
  39. data/test/fixtures/unified_values/small_html/ignored_class_actual.html +10 -0
  40. data/test/fixtures/unified_values/small_html/ignored_class_expected.json +13 -0
  41. data/test/fixtures/unified_values/small_html/img_actual.html +12 -0
  42. data/test/fixtures/unified_values/small_html/img_expected.json +23 -0
  43. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +10 -0
  44. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +16 -0
  45. data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +10 -0
  46. data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +12 -0
  47. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +10 -0
  48. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +14 -0
  49. data/test/fixtures/unified_values/small_html/option_tag_actual.html +9 -0
  50. data/test/fixtures/unified_values/small_html/option_tag_expected.json +13 -0
  51. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +10 -0
  52. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +22 -0
  53. data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +9 -0
  54. data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +8 -0
  55. data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +6 -0
  56. data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +8 -0
  57. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +12 -0
  58. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +24 -0
  59. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +12 -0
  60. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +14 -0
  61. data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +10 -0
  62. data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +13 -0
  63. data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +137 -0
  64. data/test/lib/html_replacers/unified_values/element_category_test.rb +49 -0
  65. data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +137 -0
  66. data/test/lib/html_replacers/unified_values/text_replacer_test.rb +270 -0
  67. data/test/lib/html_replacers/unified_values/text_scraper_test.rb +121 -0
  68. data/test/lib/html_replacers/unified_values/values_stack_test.rb +122 -0
  69. data/test/lib/lang_test.rb +59 -1
  70. data/test/lib/services/value_agent_test.rb +32 -0
  71. data/test/test_helper.rb +18 -2
  72. data/wovnrb.gemspec +1 -0
  73. metadata +134 -7
  74. data/spec/spec_helper.rb +0 -2
  75. data/spec/wovnrb_spec.rb +0 -7
@@ -0,0 +1,35 @@
1
+ module Wovnrb
2
+ module UnifiedValues
3
+ class TextReplacer < ReplacerBase
4
+ def initialize(store, text_index)
5
+ super(store)
6
+ @text_index = text_index
7
+ end
8
+
9
+ def replace(dom, lang)
10
+ translated_nodes_with_targets = NodeSwappingTargetsCreator.new(TextScraper.new(@ignored_class_set).run(dom)).run!
11
+ text_index_with_targets = DstSwappingTargetsCreator.new(@text_index).run!
12
+
13
+ translated_nodes_with_targets.each do |translated_nodes_with_target|
14
+ dst_swapping_targets =
15
+ text_index_with_targets[translated_nodes_with_target[:dst]]
16
+ .try(:fetch, lang.lang_code, nil)
17
+ .try(:first)
18
+ .try(:fetch, 'swapping_targets', nil)
19
+ next unless dst_swapping_targets
20
+
21
+ translated_nodes_with_target[:swapping_targets].each_with_index do |node_swapping_target, index|
22
+ # NOTE: current logic to swap back search text node and find wovn-src base on the text node.
23
+ # if `translated_text` is empry string, translated html don't have text node. it means that widget can't find wovn-src and swap back
24
+ # so we use `\u200b`(ZERO WIDTH SPACE) instead of empty string
25
+ translated_text = dst_swapping_targets[index].blank? ? "\u200b" : dst_swapping_targets[index]
26
+ original_text = node_swapping_target.content
27
+
28
+ node_swapping_target.content = translated_text
29
+ add_comment_node(node_swapping_target, original_text)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,152 @@
1
+ module Wovnrb
2
+ module UnifiedValues
3
+ class TextScraper
4
+ def initialize(ignored_class_set)
5
+ @ignored_class_set = ignored_class_set
6
+ @values = []
7
+ @values_stack = nil
8
+ end
9
+
10
+ def run(dom)
11
+ refresh_all!
12
+ @values_stack = ValuesStack.new(dom.path, 1)
13
+ scrape(dom)
14
+ end
15
+
16
+ def create_text_value(src, node_stack)
17
+ { dst: ValueAgent.normalize_text(src.gsub(/&nbsp;/i, ' ')), nodes: node_stack }
18
+ end
19
+
20
+ private
21
+
22
+ def scrape(dom)
23
+ type = stop_recursion_type(dom)
24
+ if type
25
+ case type
26
+ when 'ignore_element', 'skip_element'
27
+ next_stack = @values_stack.build_next_stack
28
+ build_src
29
+ @values_stack = next_stack
30
+ when 'text_element'
31
+ @values_stack.add_text_element(dom, dom.content)
32
+ when 'empty_element'
33
+ @values_stack.add(dom, empty_tag(dom))
34
+ when 'comment_element'
35
+ # do nothing
36
+ else
37
+ raise 'Unsupported type'
38
+ end
39
+ elsif inline_element?(dom)
40
+ @values_stack.add(dom, start_tag(dom))
41
+ scrape_children_of(dom)
42
+ @values_stack.add(dom, end_tag(dom)) unless @values_stack.blank?
43
+ elsif block_element?(dom)
44
+ next_stack = @values_stack.build_next_stack
45
+ build_src
46
+ @values_stack = ValuesStack.new(dom.path, 1)
47
+ scrape_children_of(dom)
48
+ build_src
49
+ @values_stack = next_stack
50
+ else
51
+ next_stack = @values_stack.build_next_stack
52
+ build_src
53
+ @values_stack = ValuesStack.new(dom.path, 1)
54
+ scrape_children_of(dom)
55
+ build_src
56
+ @values_stack = next_stack
57
+ end
58
+
59
+ @values
60
+ end
61
+
62
+ def scrape_children_of(dom)
63
+ dom.children.each { |c| scrape(c) } unless wovn_ignore_element?(dom)
64
+ end
65
+
66
+ def build_src
67
+ return nil if @values_stack.blank?
68
+
69
+ src = @values_stack.src
70
+ @values << create_text_value(src, @values_stack.node_stack) if src.present? && !tag_only?(src)
71
+ @values_stack = nil
72
+ end
73
+
74
+ def tag_only?(src)
75
+ Nokogiri::HTML5.fragment(src).text.blank?
76
+ end
77
+
78
+ def start_tag(dom)
79
+ if wovn_ignore_element?(dom)
80
+ "<#{dom.name} wovn-ignore>"
81
+ else
82
+ "<#{dom.name}>"
83
+ end
84
+ end
85
+
86
+ def end_tag(dom)
87
+ "</#{dom.name}>"
88
+ end
89
+
90
+ def empty_tag(dom)
91
+ start_tag(dom)
92
+ end
93
+
94
+ def block_element?(element)
95
+ ElementCategory::BLOCK_ELEMENTS.include?(element.name)
96
+ end
97
+
98
+ def inline_element?(element)
99
+ ElementCategory::INLINE_ELEMENTS.include?(element.name)
100
+ end
101
+
102
+ def empty_element?(element)
103
+ ElementCategory::EMPTY_ELEMENTS.include?(element.name)
104
+ end
105
+
106
+ def ignore_element?(element)
107
+ ElementCategory::IGNORE_ELEMENTS.include?(element.name)
108
+ end
109
+
110
+ def skip_element?(element)
111
+ ElementCategory::SKIP_ELEMENTS.include?(element.name)
112
+ end
113
+
114
+ def comment_element?(element)
115
+ element.comment?
116
+ end
117
+
118
+ def text_element?(element)
119
+ element.text?
120
+ end
121
+
122
+ def wovn_ignore_element?(element)
123
+ return false unless element
124
+
125
+ return true if element.attribute('wovn-ignore')
126
+
127
+ class_attribute = element.attribute('class')
128
+ return false unless class_attribute
129
+ class_attribute.value.split.any? { |c| @ignored_class_set.include?(c) }
130
+ end
131
+
132
+ def stop_recursion_type(element)
133
+ return 'ignore_element' if ignore_element?(element)
134
+ return 'skip_element' if skip_element?(element)
135
+ return 'text_element' if text_element?(element)
136
+ return 'empty_element' if empty_element?(element)
137
+ return 'comment_element' if comment_element?(element)
138
+
139
+ nil
140
+ end
141
+
142
+ def refresh_all!
143
+ @values_stack = nil
144
+ refresh_values!
145
+ end
146
+
147
+ def refresh_values!
148
+ @values = []
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,65 @@
1
+ require 'wovnrb/services/value_agent'
2
+
3
+ module Wovnrb
4
+ module UnifiedValues
5
+ class ValuesStack
6
+ attr_reader :node_stack
7
+
8
+ # @param head_path [String]
9
+ # @param index [Number]
10
+ #
11
+ # Be careful xpath's index starts with 1
12
+ def initialize(head_path, index)
13
+ @head_path = head_path
14
+ @index = index
15
+ @node_stack = []
16
+ @src_stack = []
17
+ @src_without_tag_stack = []
18
+ end
19
+
20
+ # @param node [Nokogiri::XML::Element]
21
+ # @param src [String]
22
+ def add(node, src)
23
+ @node_stack << node if node.name != 'text' || node.content.present?
24
+ @src_stack << Wovnrb::ValueAgent.normalize_text(src)
25
+ end
26
+
27
+ # @param node [Nokogiri::XML::Element]
28
+ # @param dom_content [String]
29
+ def add_text_element(node, dom_content)
30
+ add(node, CGI.escapeHTML(dom_content))
31
+ @src_without_tag_stack << Wovnrb::ValueAgent.normalize_text(dom_content)
32
+ end
33
+
34
+ # @return [Bool]
35
+ def blank?
36
+ @src_stack.blank?
37
+ end
38
+
39
+ # @return [String]
40
+ def path
41
+ return @head_path if @head_path.end_with?('title')
42
+
43
+ # Ends with "text()" because some type checking takes path as a normal text when the path ends with "text()"
44
+ p = "#{@head_path}/text()"
45
+
46
+ @index == 1 ? p : "#{p}[#{@index}]"
47
+ end
48
+
49
+ # @return [String]
50
+ def src
51
+ @src_stack.inject(:+)
52
+ end
53
+
54
+ # @return [String]
55
+ def src_without_tag
56
+ @src_without_tag_stack.inject(:+)
57
+ end
58
+
59
+ # @return [ValuesStack]
60
+ def build_next_stack
61
+ ValuesStack.new(@head_path, @index + 1)
62
+ end
63
+ end
64
+ end
65
+ end
data/lib/wovnrb/lang.rb CHANGED
@@ -204,6 +204,7 @@ module Wovnrb
204
204
 
205
205
  def replace_dom_values(dom, values, store, url, headers)
206
206
  text_index = values['text_vals'] || {}
207
+ html_text_index = values['html_text_vals'] || {}
207
208
  src_index = values['img_vals'] || {}
208
209
  img_src_prefix = values['img_src_prefix'] || ''
209
210
  host_aliases = values['host_aliases'] || []
@@ -215,7 +216,11 @@ module Wovnrb
215
216
  replacers << LinkReplacer.new(store, pattern, headers)
216
217
  end
217
218
 
218
- replacers << TextReplacer.new(store, text_index)
219
+ unless html_text_index.empty?
220
+ replacers << UnifiedValues::TextReplacer.new(store, html_text_index)
221
+ else
222
+ replacers << TextReplacer.new(store, text_index)
223
+ end
219
224
  replacers << MetaReplacer.new(store, text_index, pattern, headers)
220
225
  replacers << InputReplacer.new(store, text_index)
221
226
  replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
@@ -0,0 +1,9 @@
1
+ module Wovnrb
2
+ class ValueAgent
3
+ def self.normalize_text(src)
4
+ src.gsub(/[\ufffd]/, "\b")
5
+ .gsub(/[\n \t\u0020\u0009\u000C\u200B\u000D\u000A]+/, ' ')
6
+ .gsub(/^[\s\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+|[\s\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+$/, '')
7
+ end
8
+ end
9
+ end
data/lib/wovnrb/store.rb CHANGED
@@ -6,6 +6,7 @@ require 'wovnrb/services/wovn_logger'
6
6
  require 'wovnrb/services/glob'
7
7
  require 'wovnrb/settings'
8
8
  require 'active_support'
9
+ require 'active_support/core_ext'
9
10
 
10
11
  module Wovnrb
11
12
  class Store
@@ -64,43 +65,35 @@ module Wovnrb
64
65
  errors = [];
65
66
  #if valid_token?(!settings.has_key?('project_token') || settings['project_token'].length < 5 || settings['project_token'].length > 6
66
67
  if !valid_token?(settings['project_token'])
67
- valid = false
68
68
  errors.push("Project token #{settings['project_token']} is not valid.")
69
69
  end
70
70
  if settings.has_key?('ignore_paths') && !settings['ignore_paths'].kind_of?(Array)
71
- valid = false
72
71
  errors.push("Ignore Paths #{settings['ignore_paths']} should be Array.")
73
72
  end
74
73
  if !settings.has_key?('url_pattern') || settings['url_pattern'].length == 0
75
- valid = false
76
74
  errors.push("Url pattern #{settings['url_pattern']} is not valid.")
77
75
  end
78
76
  if !settings.has_key?('query') || !settings['query'].kind_of?(Array)
79
- valid = false
80
77
  errors.push("query config #{settings['query']} is not valid.")
81
78
  end
82
79
  if !settings.has_key?('ignore_class') || !settings['ignore_class'].kind_of?(Array)
83
- valid = false
84
80
  errors.push("ignore_class config #{settings['ignore_class']} should be Array.")
85
81
  end
86
82
  if !settings.has_key?('api_url') || settings['api_url'].length == 0
87
- valid = false
88
83
  errors.push("API URL is not configured.")
89
84
  end
90
85
  if !settings.has_key?('default_lang') || settings['default_lang'].nil?
91
- valid = false
92
86
  errors.push("Default lang #{settings['default_lang']} is not valid.")
93
87
  end
94
88
  if !settings.has_key?('supported_langs') || !settings['supported_langs'].kind_of?(Array) || settings['supported_langs'].size < 1
95
- valid = false
96
89
  errors.push("Supported langs configuration is not valid.")
97
90
  end
98
91
  if !settings.has_key?('custom_lang_aliases') || !settings['custom_lang_aliases'].kind_of?(Hash)
99
- valid = false
100
92
  errors.push("Custom lang aliases is not valid.")
101
93
  end
102
94
  # log errors
103
95
  if errors.length > 0
96
+ valid = false
104
97
  errors.each do |e|
105
98
  WovnLogger.instance.error(e)
106
99
  end
@@ -1,3 +1,3 @@
1
1
  module Wovnrb
2
- VERSION = "1.0.13"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -0,0 +1,96 @@
1
+ <html>
2
+ <script>console.log('hello')</script>
3
+ <title>SIMPLE TITLE</title>
4
+ <meta name="description" content="meta description">
5
+ <body>
6
+ <div>
7
+ <form role="search">
8
+ <div class="search-control">
9
+ <input type="search" id="site-search" name="q"
10
+ placeholder="Search the site..."
11
+ aria-label="Search through site content">
12
+ <button>Search</button>
13
+ </div>
14
+ </form>
15
+
16
+ <form role="search" method="get" class="search-form wb-flex" action="https://www.wovn.io/">
17
+ <label>
18
+ <span>Search for:</span>
19
+ <input type="search" placeholder="Search …" value="" name="s" title="Search for:">
20
+ </label>
21
+ </form>
22
+ </div>
23
+ <div>
24
+ <span>
25
+ <p>Well</p>
26
+ </span>
27
+ </div>
28
+ <a class="home">
29
+ <span><h1>scrap</h1>only
30
+ <img src="/assets/hedgehog.jpg">
31
+ this too</span>
32
+ </a><br>
33
+ <span href="/" class="home">
34
+ <a><img src="/assets/hedgehog.jpg"></a>
35
+ <a>"Storm hunters" it is to see.</a>
36
+ <a>BLABLA5</a>
37
+ </span>
38
+ <div>
39
+ <form class="ftr-locator-form">
40
+ <input name="hdr-ftr-locator-term" placeholder="Street, City, State or ZIP" type="text">
41
+ <input type="submit">
42
+
43
+ </form>
44
+ </div>
45
+ <h1>
46
+ <div style="opacity: 1;">Welcome to my Website!</div>
47
+ </h1>
48
+ <div>
49
+ <div class="content-asset"><div class="heading1">LOST AF?</div>
50
+ <div class="error-info">
51
+ <p>WE'LL TAKE THE BLAME.</p>
52
+ <p>LET US MAKE IT UP TO YOU WITH SOME LINKS.</p>
53
+ </div>
54
+
55
+ <div class="shopping-link">
56
+ <nav class="gender-links">
57
+ <ul>
58
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/mens/" class="nav-link">SHOP MENS</a></li>
59
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/womens/" class="nav-link">SHOP WOMENS</a></li>
60
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/boys/" class="nav-link">shop boys</a></li>
61
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/girls/" class="nav-link">shop girls</a></li>
62
+ </ul>
63
+ </nav>
64
+ </div></div>
65
+ <div class="error-custom-message noshow">"<div class="heading1">LOST AF?</div>
66
+ <div class="error-info">
67
+ <p>WE'LL TAKE THE BLAME.</p>
68
+ <p>LET US MAKE IT UP TO YOU WITH SOME LINKS.</p>
69
+ </div>
70
+
71
+ <div class="shopping-link">
72
+ <nav class="gender-links">
73
+ <ul>
74
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/mens/" class="nav-link">SHOP MENS</a></li>
75
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/womens/" class="nav-link">SHOP WOMENS</a></li>
76
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/boys/" class="nav-link">shop boys</a></li>
77
+ <li class="link-text"><a href="https://www.abercrombie.co.jp/en_JP/kids/girls/" class="nav-link">shop girls</a></li>
78
+ </ul>
79
+ </nav>
80
+ </div></div>"
81
+ </div>
82
+
83
+ <p>This is a paragraph! Here's how you make a link: <a href="https://neocities.org">Neocities</a>.</p>
84
+
85
+ <p>Here's how you can make <strong>bold</strong> and <em>italic</em> text.</p>
86
+ <p>Here's how you can add an image:</p>
87
+ <p>Here's how to make a list:</p>
88
+ <ul>
89
+ <li href="/">First thing</li>
90
+ <li>Second thing</li>
91
+ <li>Third thing</li>
92
+ </ul>
93
+
94
+ <p>To learn more HTML/CSS, check out these <a href="https://neocities.org/tutorials">tutorials</a>!</p>
95
+ </body>
96
+ </html>
@@ -0,0 +1,251 @@
1
+ [
2
+ {
3
+ "xpath": "/html/head/title",
4
+ "srcs": [
5
+ "SIMPLE TITLE"
6
+ ]
7
+ },
8
+ {
9
+ "xpath": "/html/body/div/form/div/text()",
10
+ "srcs": [
11
+ "<input>",
12
+ "<button>",
13
+ "Search",
14
+ "</button>"
15
+ ]
16
+ },
17
+ {
18
+ "xpath": "/html/body/div/form[2]/text()",
19
+ "srcs": [
20
+ "<label>",
21
+ "<span>",
22
+ "Search for:",
23
+ "</span>",
24
+ "<input>",
25
+ "</label>"
26
+ ]
27
+ },
28
+ {
29
+ "xpath": "/html/body/div[2]/span/p/text()",
30
+ "srcs": [
31
+ "Well"
32
+ ]
33
+ },
34
+ {
35
+ "xpath": "/html/body/a/span/h1/text()",
36
+ "srcs": [
37
+ "scrap"
38
+ ]
39
+ },
40
+ {
41
+ "xpath": "/html/body/text()[4]",
42
+ "srcs": [
43
+ "only"
44
+ ]
45
+ },
46
+ {
47
+ "xpath": "/html/body/text()[5]",
48
+ "srcs": [
49
+ "this too",
50
+ "</span>",
51
+ "</a>",
52
+ "<br>",
53
+ "<span>",
54
+ "<a>"
55
+ ]
56
+ },
57
+ {
58
+ "xpath": "/html/body/text()[6]",
59
+ "srcs": [
60
+ "<a>",
61
+ "&quot;Storm hunters&quot; it is to see.",
62
+ "</a>",
63
+ "<a>",
64
+ "BLABLA5",
65
+ "</a>",
66
+ "</span>"
67
+ ]
68
+ },
69
+ {
70
+ "xpath": "/html/body/h1/div/text()",
71
+ "srcs": [
72
+ "Welcome to my Website!"
73
+ ]
74
+ },
75
+ {
76
+ "xpath": "/html/body/div[4]/div/div/text()",
77
+ "srcs": [
78
+ "LOST AF?"
79
+ ]
80
+ },
81
+ {
82
+ "xpath": "/html/body/div[4]/div/div[2]/p/text()",
83
+ "srcs": [
84
+ "WE&#39;LL TAKE THE BLAME."
85
+ ]
86
+ },
87
+ {
88
+ "xpath": "/html/body/div[4]/div/div[2]/p[2]/text()",
89
+ "srcs": [
90
+ "LET US MAKE IT UP TO YOU WITH SOME LINKS."
91
+ ]
92
+ },
93
+ {
94
+ "xpath": "/html/body/div[4]/div/div[3]/nav/ul/li/text()",
95
+ "srcs": [
96
+ "<a>",
97
+ "SHOP MENS",
98
+ "</a>"
99
+ ]
100
+ },
101
+ {
102
+ "xpath": "/html/body/div[4]/div/div[3]/nav/ul/li[2]/text()",
103
+ "srcs": [
104
+ "<a>",
105
+ "SHOP WOMENS",
106
+ "</a>"
107
+ ]
108
+ },
109
+ {
110
+ "xpath": "/html/body/div[4]/div/div[3]/nav/ul/li[3]/text()",
111
+ "srcs": [
112
+ "<a>",
113
+ "shop boys",
114
+ "</a>"
115
+ ]
116
+ },
117
+ {
118
+ "xpath": "/html/body/div[4]/div/div[3]/nav/ul/li[4]/text()",
119
+ "srcs": [
120
+ "<a>",
121
+ "shop girls",
122
+ "</a>"
123
+ ]
124
+ },
125
+ {
126
+ "xpath": "/html/body/div[4]/div[2]/text()",
127
+ "srcs": [
128
+ "&quot;"
129
+ ]
130
+ },
131
+ {
132
+ "xpath": "/html/body/div[4]/div[2]/div/text()",
133
+ "srcs": [
134
+ "LOST AF?"
135
+ ]
136
+ },
137
+ {
138
+ "xpath": "/html/body/div[4]/div[2]/div[2]/p/text()",
139
+ "srcs": [
140
+ "WE&#39;LL TAKE THE BLAME."
141
+ ]
142
+ },
143
+ {
144
+ "xpath": "/html/body/div[4]/div[2]/div[2]/p[2]/text()",
145
+ "srcs": [
146
+ "LET US MAKE IT UP TO YOU WITH SOME LINKS."
147
+ ]
148
+ },
149
+ {
150
+ "xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li/text()",
151
+ "srcs": [
152
+ "<a>",
153
+ "SHOP MENS",
154
+ "</a>"
155
+ ]
156
+ },
157
+ {
158
+ "xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li[2]/text()",
159
+ "srcs": [
160
+ "<a>",
161
+ "SHOP WOMENS",
162
+ "</a>"
163
+ ]
164
+ },
165
+ {
166
+ "xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li[3]/text()",
167
+ "srcs": [
168
+ "<a>",
169
+ "shop boys",
170
+ "</a>"
171
+ ]
172
+ },
173
+ {
174
+ "xpath": "/html/body/div[4]/div[2]/div[3]/nav/ul/li[4]/text()",
175
+ "srcs": [
176
+ "<a>",
177
+ "shop girls",
178
+ "</a>"
179
+ ]
180
+ },
181
+ {
182
+ "xpath": "/html/body/div[4]/text()[3]",
183
+ "srcs": [
184
+ "&quot;"
185
+ ]
186
+ },
187
+ {
188
+ "xpath": "/html/body/p/text()",
189
+ "srcs": [
190
+ "This is a paragraph! Here&#39;s how you make a link:",
191
+ "<a>",
192
+ "Neocities",
193
+ "</a>",
194
+ "."
195
+ ]
196
+ },
197
+ {
198
+ "xpath": "/html/body/p[2]/text()",
199
+ "srcs": [
200
+ "Here&#39;s how you can make",
201
+ "<strong>",
202
+ "bold",
203
+ "</strong>",
204
+ "and",
205
+ "<em>",
206
+ "italic",
207
+ "</em>",
208
+ "text."
209
+ ]
210
+ },
211
+ {
212
+ "xpath": "/html/body/p[3]/text()",
213
+ "srcs": [
214
+ "Here&#39;s how you can add an image:"
215
+ ]
216
+ },
217
+ {
218
+ "xpath": "/html/body/p[4]/text()",
219
+ "srcs": [
220
+ "Here&#39;s how to make a list:"
221
+ ]
222
+ },
223
+ {
224
+ "xpath": "/html/body/ul/li/text()",
225
+ "srcs": [
226
+ "First thing"
227
+ ]
228
+ },
229
+ {
230
+ "xpath": "/html/body/ul/li[2]/text()",
231
+ "srcs": [
232
+ "Second thing"
233
+ ]
234
+ },
235
+ {
236
+ "xpath": "/html/body/ul/li[3]/text()",
237
+ "srcs": [
238
+ "Third thing"
239
+ ]
240
+ },
241
+ {
242
+ "xpath": "/html/body/p[5]/text()",
243
+ "srcs": [
244
+ "To learn more HTML/CSS, check out these",
245
+ "<a>",
246
+ "tutorials",
247
+ "</a>",
248
+ "!"
249
+ ]
250
+ }
251
+ ]