wovnrb 1.0.13 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +1 -1
- data/lib/wovnrb.rb +7 -0
- data/lib/wovnrb/html_replacers/replacer_base.rb +2 -1
- data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +76 -0
- data/lib/wovnrb/html_replacers/unified_values/element_category.rb +242 -0
- data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +134 -0
- data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +35 -0
- data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +152 -0
- data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +65 -0
- data/lib/wovnrb/lang.rb +6 -1
- data/lib/wovnrb/services/value_agent.rb +9 -0
- data/lib/wovnrb/store.rb +2 -9
- data/lib/wovnrb/version.rb +1 -1
- data/test/fixtures/unified_values/site_html/simple_actual.html +96 -0
- data/test/fixtures/unified_values/site_html/simple_expected.json +251 -0
- data/test/fixtures/unified_values/site_html/wovn.io_actual.html +686 -0
- data/test/fixtures/unified_values/site_html/wovn.io_expected.json +543 -0
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +1024 -0
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +3345 -0
- data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +22 -0
- data/test/fixtures/unified_values/small_html/br_tag_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/br_tag_expected.json +12 -0
- data/test/fixtures/unified_values/small_html/comment_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/comment_tag_expected.json +10 -0
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +7 -0
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +11 -0
- data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +14 -0
- data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +8 -0
- data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +20 -0
- data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +20 -0
- data/test/fixtures/unified_values/small_html/empty_tag_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/empty_tag_expected.json +12 -0
- data/test/fixtures/unified_values/small_html/empty_text_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/empty_text_expected.json +1 -0
- data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +16 -0
- data/test/fixtures/unified_values/small_html/ignored_class_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/ignored_class_expected.json +13 -0
- data/test/fixtures/unified_values/small_html/img_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/img_expected.json +23 -0
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +16 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +12 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +14 -0
- data/test/fixtures/unified_values/small_html/option_tag_actual.html +9 -0
- data/test/fixtures/unified_values/small_html/option_tag_expected.json +13 -0
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +22 -0
- data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +9 -0
- data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +8 -0
- data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +6 -0
- data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +8 -0
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +24 -0
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +12 -0
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +14 -0
- data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +10 -0
- data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +13 -0
- data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +137 -0
- data/test/lib/html_replacers/unified_values/element_category_test.rb +49 -0
- data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +137 -0
- data/test/lib/html_replacers/unified_values/text_replacer_test.rb +270 -0
- data/test/lib/html_replacers/unified_values/text_scraper_test.rb +121 -0
- data/test/lib/html_replacers/unified_values/values_stack_test.rb +122 -0
- data/test/lib/lang_test.rb +59 -1
- data/test/lib/services/value_agent_test.rb +32 -0
- data/test/test_helper.rb +18 -2
- data/wovnrb.gemspec +1 -0
- metadata +134 -7
- data/spec/spec_helper.rb +0 -2
- data/spec/wovnrb_spec.rb +0 -7
@@ -0,0 +1,22 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"xpath": "/html/body/div/text()",
|
4
|
+
"srcs": [
|
5
|
+
"<span>",
|
6
|
+
"text value1"
|
7
|
+
]
|
8
|
+
},
|
9
|
+
{
|
10
|
+
"xpath": "/html/body/div/span/p/text()",
|
11
|
+
"srcs": [
|
12
|
+
"text value2"
|
13
|
+
]
|
14
|
+
},
|
15
|
+
{
|
16
|
+
"xpath": "/html/body/div/text()[2]",
|
17
|
+
"srcs": [
|
18
|
+
"text value3",
|
19
|
+
"</span>"
|
20
|
+
]
|
21
|
+
}
|
22
|
+
]
|
@@ -0,0 +1,20 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"xpath": "/html/body/div/text()",
|
4
|
+
"srcs": [
|
5
|
+
"text value1",
|
6
|
+
"<span>",
|
7
|
+
"text value2",
|
8
|
+
"<span>",
|
9
|
+
"text value3",
|
10
|
+
"<span>",
|
11
|
+
"text value4",
|
12
|
+
"</span>",
|
13
|
+
"text value5",
|
14
|
+
"</span>",
|
15
|
+
"text value6",
|
16
|
+
"</span>",
|
17
|
+
"text value7"
|
18
|
+
]
|
19
|
+
}
|
20
|
+
]
|
@@ -0,0 +1 @@
|
|
1
|
+
[]
|
@@ -0,0 +1,23 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"xpath": "/html/body/div/a/span/h1/text()",
|
4
|
+
"srcs": [
|
5
|
+
"text value1"
|
6
|
+
]
|
7
|
+
},
|
8
|
+
{
|
9
|
+
"xpath": "/html/body/div/text()[2]",
|
10
|
+
"srcs": [
|
11
|
+
"text value2"
|
12
|
+
]
|
13
|
+
},
|
14
|
+
{
|
15
|
+
"xpath": "/html/body/div/text()[3]",
|
16
|
+
"srcs": [
|
17
|
+
"text value3",
|
18
|
+
"</span>",
|
19
|
+
"</a>",
|
20
|
+
"<br>"
|
21
|
+
]
|
22
|
+
}
|
23
|
+
]
|