onebox 1.4.9 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: acf0205db8c9ea7c0a4c0ded94674fae2c2a7647
4
- data.tar.gz: fd639f567d646668041ae8c28a5af22d39512fe8
3
+ metadata.gz: 5b5e72349ed5dcca5148875d8e669b3850de0311
4
+ data.tar.gz: b1d032a771038719f1c51064fee5942c50f94353
5
5
  SHA512:
6
- metadata.gz: 3f4fcf781c785773901b5507e06d746d76e3adc1975074524c8cbb3f687e4d836b7352c4618e4e55a24a42f70550df964bc507ac0d98957f84ffa41f00afacdd
7
- data.tar.gz: 2972de535e55661b17473efb5df139d5d7ef2eae4993de8683897960a4edaeae470608b0715adb1496a8e52fa090b8531cbffe06c53cbf9aaececb305072db28
6
+ metadata.gz: cd575f2a3a215a63a5c9359e788cb5c3740b8b7b1d53ad59cbdf9db2e120fc63c95e6c96432f11b38f5dbd8d9a7fff41ce722fe51c70e3843bbad2b5362be229
7
+ data.tar.gz: 6cb0cc9aef0081ba65b35221261492b44cf6ca161161d50044c1c108d7732a4f9c840867907d8ca1d9ea308430378cfe5f45909a82a31461fd0939ac85399d96
@@ -10,26 +10,66 @@ module Onebox
10
10
  private
11
11
 
12
12
  def data
13
- # get all the paras
14
- paras = raw.search("p")
13
+ paras = []
15
14
  text = ""
16
15
 
16
+ # Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
17
+ # Author Lidlanca
18
+ # Date 9/8/2014
19
+ if ( m_url_hash = @url.match /#([^\/?]+)/ ) #extract url hash
20
+ m_url_hash_name= m_url_hash[1]
21
+ end
22
+
23
+ unless m_url_hash.nil?
24
+ section_header_title = raw.xpath("//span[@id='#{m_url_hash_name}']")
25
+
26
+ if section_header_title.empty?
27
+ paras = raw.search("p") #default get all the paras
28
+ else
29
+ section_title_text = section_header_title.inner_text
30
+ section_header = section_header_title[0].parent #parent element of the section span element should be an <h3> node
31
+ cur_element = section_header
32
+
33
+ # p|text|div covers the general case. We assume presence of atleast 1 P node. if section has no P node we may end up with a P node from the next section.
34
+ # div tag is commonly used as an assets wraper in an article section. often as the first element holding an image.
35
+ # ul support will imporve the output generated for a section with a list as the main content (for example: an Author Bibliography, A musician Discography, etc)
36
+ first_p_found = nil
37
+ while ( ((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil? ) do #from section header get the next sibling until it is a breaker tag
38
+ cur_element = next_sibling
39
+ if (cur_element.name == "p" || cur_element.name == "ul") #we treat a list as we detect a p to avoid showing
40
+ first_p_found = true
41
+ paras.push(cur_element)
42
+ end
43
+ end
44
+ end
45
+ else # no hash found in url
46
+ paras = raw.search("p") #default get all the paras
47
+ end
48
+
17
49
  unless paras.empty?
18
50
  cnt = 0
19
51
  while text.length < Onebox::LayoutSupport.max_text && cnt <= 3
20
52
  break if cnt >= paras.size
21
53
  text << " " unless cnt == 0
22
- paragraph = paras[cnt].inner_text[0..Onebox::LayoutSupport.max_text]
54
+
55
+ if paras[cnt].name =="ul" #Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfourtently no newline allowed in output
56
+ li_index=1
57
+ list_items = []
58
+ paras[cnt].children.css("li").each {|li| list_items.push "#{li_index}." + li.inner_text ; li_index+=1}
59
+ paragraph = (list_items.join " |\n ")[0..Onebox::LayoutSupport.max_text]
60
+ else
61
+ paragraph = paras[cnt].inner_text[0..Onebox::LayoutSupport.max_text]
62
+ end
63
+
23
64
  paragraph.gsub!(/\[\d+\]/mi, "")
24
65
  text << paragraph
25
66
  cnt += 1
26
67
  end
27
68
  end
28
-
29
69
  text = "#{text[0..Onebox::LayoutSupport.max_text]}..." if text.length > Onebox::LayoutSupport.max_text
30
70
  result = {
31
71
  link: link,
32
- title: raw.css("html body h1").inner_text,
72
+ title: raw.css("html body h1").inner_text + (section_title_text ? " | " + section_title_text : ""), #if a section sub title exists add it to the main article title
33
73
  description: text
34
74
  }
35
75
  img = raw.css(".image img")
@@ -1,3 +1,3 @@
1
1
  module Onebox
2
- VERSION = "1.4.9"
2
+ VERSION = "1.5.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.9
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-08-29 00:00:00.000000000 Z
13
+ date: 2014-09-09 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: multi_json
@@ -342,7 +342,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
342
342
  version: '0'
343
343
  requirements: []
344
344
  rubyforge_project:
345
- rubygems_version: 2.1.11
345
+ rubygems_version: 2.2.2
346
346
  signing_key:
347
347
  specification_version: 4
348
348
  summary: A gem for turning URLs into previews.