wovnrb 1.0.13 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +1 -1
  3. data/lib/wovnrb.rb +7 -0
  4. data/lib/wovnrb/html_replacers/replacer_base.rb +2 -1
  5. data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +76 -0
  6. data/lib/wovnrb/html_replacers/unified_values/element_category.rb +242 -0
  7. data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +134 -0
  8. data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +35 -0
  9. data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +152 -0
  10. data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +65 -0
  11. data/lib/wovnrb/lang.rb +6 -1
  12. data/lib/wovnrb/services/value_agent.rb +9 -0
  13. data/lib/wovnrb/store.rb +2 -9
  14. data/lib/wovnrb/version.rb +1 -1
  15. data/test/fixtures/unified_values/site_html/simple_actual.html +96 -0
  16. data/test/fixtures/unified_values/site_html/simple_expected.json +251 -0
  17. data/test/fixtures/unified_values/site_html/wovn.io_actual.html +686 -0
  18. data/test/fixtures/unified_values/site_html/wovn.io_expected.json +543 -0
  19. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +1024 -0
  20. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +3345 -0
  21. data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +12 -0
  22. data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +22 -0
  23. data/test/fixtures/unified_values/small_html/br_tag_actual.html +10 -0
  24. data/test/fixtures/unified_values/small_html/br_tag_expected.json +12 -0
  25. data/test/fixtures/unified_values/small_html/comment_tag_actual.html +12 -0
  26. data/test/fixtures/unified_values/small_html/comment_tag_expected.json +10 -0
  27. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +7 -0
  28. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +11 -0
  29. data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +14 -0
  30. data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +8 -0
  31. data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +20 -0
  32. data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +20 -0
  33. data/test/fixtures/unified_values/small_html/empty_tag_actual.html +10 -0
  34. data/test/fixtures/unified_values/small_html/empty_tag_expected.json +12 -0
  35. data/test/fixtures/unified_values/small_html/empty_text_actual.html +12 -0
  36. data/test/fixtures/unified_values/small_html/empty_text_expected.json +1 -0
  37. data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +12 -0
  38. data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +16 -0
  39. data/test/fixtures/unified_values/small_html/ignored_class_actual.html +10 -0
  40. data/test/fixtures/unified_values/small_html/ignored_class_expected.json +13 -0
  41. data/test/fixtures/unified_values/small_html/img_actual.html +12 -0
  42. data/test/fixtures/unified_values/small_html/img_expected.json +23 -0
  43. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +10 -0
  44. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +16 -0
  45. data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +10 -0
  46. data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +12 -0
  47. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +10 -0
  48. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +14 -0
  49. data/test/fixtures/unified_values/small_html/option_tag_actual.html +9 -0
  50. data/test/fixtures/unified_values/small_html/option_tag_expected.json +13 -0
  51. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +10 -0
  52. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +22 -0
  53. data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +9 -0
  54. data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +8 -0
  55. data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +6 -0
  56. data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +8 -0
  57. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +12 -0
  58. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +24 -0
  59. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +12 -0
  60. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +14 -0
  61. data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +10 -0
  62. data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +13 -0
  63. data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +137 -0
  64. data/test/lib/html_replacers/unified_values/element_category_test.rb +49 -0
  65. data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +137 -0
  66. data/test/lib/html_replacers/unified_values/text_replacer_test.rb +270 -0
  67. data/test/lib/html_replacers/unified_values/text_scraper_test.rb +121 -0
  68. data/test/lib/html_replacers/unified_values/values_stack_test.rb +122 -0
  69. data/test/lib/lang_test.rb +59 -1
  70. data/test/lib/services/value_agent_test.rb +32 -0
  71. data/test/test_helper.rb +18 -2
  72. data/wovnrb.gemspec +1 -0
  73. metadata +134 -7
  74. data/spec/spec_helper.rb +0 -2
  75. data/spec/wovnrb_spec.rb +0 -7
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ text value1
7
+ <p>text value2</p>
8
+ text value3
9
+ </span>
10
+ </div>
11
+ </body>
12
+ </html>
@@ -0,0 +1,22 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "text value1"
7
+ ]
8
+ },
9
+ {
10
+ "xpath": "/html/body/div/span/p/text()",
11
+ "srcs": [
12
+ "text value2"
13
+ ]
14
+ },
15
+ {
16
+ "xpath": "/html/body/div/text()[2]",
17
+ "srcs": [
18
+ "text value3",
19
+ "</span>"
20
+ ]
21
+ }
22
+ ]
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ text value1<br>text value2
7
+ </span>
8
+ </div>
9
+ </body>
10
+ </html>
@@ -0,0 +1,12 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "text value1",
7
+ "<br>",
8
+ "text value2",
9
+ "</span>"
10
+ ]
11
+ }
12
+ ]
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ <!-- <span>comment1</span>-->
7
+ text value
8
+ <!-- <span>comment2</span>-->
9
+ </span>
10
+ </div>
11
+ </body>
12
+ </html>
@@ -0,0 +1,10 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "text value",
7
+ "</span>"
8
+ ]
9
+ }
10
+ ]
@@ -0,0 +1,7 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <p>&nbsp;</p>
5
+ <p>a<span>This is &lt;a&gt; text value!</span></p>
6
+ </body>
7
+ </html>
@@ -0,0 +1,11 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/p[2]/text()",
4
+ "srcs": [
5
+ "a",
6
+ "<span>",
7
+ "This is &lt;a&gt; text value!",
8
+ "</span>"
9
+ ]
10
+ }
11
+ ]
@@ -0,0 +1,14 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <div>
6
+ <div>
7
+ <div>
8
+ text value1
9
+ </div>
10
+ </div>
11
+ </div>
12
+ </div>
13
+ </body>
14
+ </html>
@@ -0,0 +1,8 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/div/div/div/text()",
4
+ "srcs": [
5
+ "text value1"
6
+ ]
7
+ }
8
+ ]
@@ -0,0 +1,20 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ text value1
6
+ <span>
7
+ text value2
8
+ <span>
9
+ text value3
10
+ <span>
11
+ text value4
12
+ </span>
13
+ text value5
14
+ </span>
15
+ text value6
16
+ </span>
17
+ text value7
18
+ </div>
19
+ </body>
20
+ </html>
@@ -0,0 +1,20 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "text value1",
6
+ "<span>",
7
+ "text value2",
8
+ "<span>",
9
+ "text value3",
10
+ "<span>",
11
+ "text value4",
12
+ "</span>",
13
+ "text value5",
14
+ "</span>",
15
+ "text value6",
16
+ "</span>",
17
+ "text value7"
18
+ ]
19
+ }
20
+ ]
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ text value1<br>text value2
7
+ </span>
8
+ </div>
9
+ </body>
10
+ </html>
@@ -0,0 +1,12 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "text value1",
7
+ "<br>",
8
+ "text value2",
9
+ "</span>"
10
+ ]
11
+ }
12
+ ]
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <div>
6
+ <span>
7
+ <p></p>
8
+ </span>
9
+ </div>
10
+ </div>
11
+ </body>
12
+ </html>
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ text value1
7
+ <img src='sample.jpg'/>
8
+ text value2
9
+ </span>
10
+ </div>
11
+ </body>
12
+ </html>
@@ -0,0 +1,16 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "text value1"
7
+ ]
8
+ },
9
+ {
10
+ "xpath": "/html/body/div/text()[2]",
11
+ "srcs": [
12
+ "text value2",
13
+ "</span>"
14
+ ]
15
+ }
16
+ ]
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ This is a <a class="ignore-me im-fine">complex</a> text value!
7
+ </span>
8
+ </div>
9
+ </body>
10
+ </html>
@@ -0,0 +1,13 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "This is a",
7
+ "<a wovn-ignore>",
8
+ "</a>",
9
+ "text value!",
10
+ "</span>"
11
+ ]
12
+ }
13
+ ]
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <a class="home">
6
+ <span><h1>text value1</h1>text value2
7
+ <img src="/neocities.png">
8
+ text value3</span>
9
+ </a><br>
10
+ </div>
11
+ </body>
12
+ </html>
@@ -0,0 +1,23 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/a/span/h1/text()",
4
+ "srcs": [
5
+ "text value1"
6
+ ]
7
+ },
8
+ {
9
+ "xpath": "/html/body/div/text()[2]",
10
+ "srcs": [
11
+ "text value2"
12
+ ]
13
+ },
14
+ {
15
+ "xpath": "/html/body/div/text()[3]",
16
+ "srcs": [
17
+ "text value3",
18
+ "</span>",
19
+ "</a>",
20
+ "<br>"
21
+ ]
22
+ }
23
+ ]
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ This is a <b>complex <span wovn-ignore>ai<a>ue</a>o</span></b> text value!
7
+ </span>
8
+ </div>
9
+ </body>
10
+ </html>
@@ -0,0 +1,16 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "This is a",
7
+ "<b>",
8
+ "complex",
9
+ "<span wovn-ignore>",
10
+ "</span>",
11
+ "</b>",
12
+ "text value!",
13
+ "</span>"
14
+ ]
15
+ }
16
+ ]
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ <b>text value!</b>
7
+ </span>
8
+ </div>
9
+ </body>
10
+ </html>
@@ -0,0 +1,12 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "<b>",
7
+ "text value!",
8
+ "</b>",
9
+ "</span>"
10
+ ]
11
+ }
12
+ ]
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span>
6
+ This is a <b>complex</b> text value!
7
+ </span>
8
+ </div>
9
+ </body>
10
+ </html>
@@ -0,0 +1,14 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/div/text()",
4
+ "srcs": [
5
+ "<span>",
6
+ "This is a",
7
+ "<b>",
8
+ "complex",
9
+ "</b>",
10
+ "text value!",
11
+ "</span>"
12
+ ]
13
+ }
14
+ ]
@@ -0,0 +1,9 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <select>
5
+ <option value="a">text value1</option>
6
+ <option value="b">text value2</option>
7
+ </select>
8
+ </body>
9
+ </html>
@@ -0,0 +1,13 @@
1
+ [
2
+ {
3
+ "xpath": "/html/body/select/text()",
4
+ "srcs": [
5
+ "<option>",
6
+ "text value1",
7
+ "</option>",
8
+ "<option>",
9
+ "text value2",
10
+ "</option>"
11
+ ]
12
+ }
13
+ ]
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div>
5
+ <span><a href='#'>text value1</a></span>
6
+ <p></p>
7
+ <strong><em>text value1</em></strong>
8
+ </div>
9
+ </body>
10
+ </html>