onebox 1.4.9 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: acf0205db8c9ea7c0a4c0ded94674fae2c2a7647
4
- data.tar.gz: fd639f567d646668041ae8c28a5af22d39512fe8
3
+ metadata.gz: 5b5e72349ed5dcca5148875d8e669b3850de0311
4
+ data.tar.gz: b1d032a771038719f1c51064fee5942c50f94353
5
5
  SHA512:
6
- metadata.gz: 3f4fcf781c785773901b5507e06d746d76e3adc1975074524c8cbb3f687e4d836b7352c4618e4e55a24a42f70550df964bc507ac0d98957f84ffa41f00afacdd
7
- data.tar.gz: 2972de535e55661b17473efb5df139d5d7ef2eae4993de8683897960a4edaeae470608b0715adb1496a8e52fa090b8531cbffe06c53cbf9aaececb305072db28
6
+ metadata.gz: cd575f2a3a215a63a5c9359e788cb5c3740b8b7b1d53ad59cbdf9db2e120fc63c95e6c96432f11b38f5dbd8d9a7fff41ce722fe51c70e3843bbad2b5362be229
7
+ data.tar.gz: 6cb0cc9aef0081ba65b35221261492b44cf6ca161161d50044c1c108d7732a4f9c840867907d8ca1d9ea308430378cfe5f45909a82a31461fd0939ac85399d96
@@ -10,26 +10,66 @@ module Onebox
10
10
  private
11
11
 
12
12
  def data
13
- # get all the paras
14
- paras = raw.search("p")
13
+ paras = []
15
14
  text = ""
16
15
 
16
+ # Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
17
+ # Author Lidlanca
18
+ # Date 9/8/2014
19
+ if ( m_url_hash = @url.match /#([^\/?]+)/ ) #extract url hash
20
+ m_url_hash_name= m_url_hash[1]
21
+ end
22
+
23
+ unless m_url_hash.nil?
24
+ section_header_title = raw.xpath("//span[@id='#{m_url_hash_name}']")
25
+
26
+ if section_header_title.empty?
27
+ paras = raw.search("p") #default get all the paras
28
+ else
29
+ section_title_text = section_header_title.inner_text
30
+ section_header = section_header_title[0].parent #parent element of the section span element should be an <h3> node
31
+ cur_element = section_header
32
+
33
+ # p|text|div covers the general case. We assume presence of atleast 1 P node. if section has no P node we may end up with a P node from the next section.
34
+ # div tag is commonly used as an assets wraper in an article section. often as the first element holding an image.
35
+ # ul support will imporve the output generated for a section with a list as the main content (for example: an Author Bibliography, A musician Discography, etc)
36
+ first_p_found = nil
37
+ while ( ((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil? ) do #from section header get the next sibling until it is a breaker tag
38
+ cur_element = next_sibling
39
+ if (cur_element.name == "p" || cur_element.name == "ul") #we treat a list as we detect a p to avoid showing
40
+ first_p_found = true
41
+ paras.push(cur_element)
42
+ end
43
+ end
44
+ end
45
+ else # no hash found in url
46
+ paras = raw.search("p") #default get all the paras
47
+ end
48
+
17
49
  unless paras.empty?
18
50
  cnt = 0
19
51
  while text.length < Onebox::LayoutSupport.max_text && cnt <= 3
20
52
  break if cnt >= paras.size
21
53
  text << " " unless cnt == 0
22
- paragraph = paras[cnt].inner_text[0..Onebox::LayoutSupport.max_text]
54
+
55
+ if paras[cnt].name =="ul" #Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfourtently no newline allowed in output
56
+ li_index=1
57
+ list_items = []
58
+ paras[cnt].children.css("li").each {|li| list_items.push "#{li_index}." + li.inner_text ; li_index+=1}
59
+ paragraph = (list_items.join " |\n ")[0..Onebox::LayoutSupport.max_text]
60
+ else
61
+ paragraph = paras[cnt].inner_text[0..Onebox::LayoutSupport.max_text]
62
+ end
63
+
23
64
  paragraph.gsub!(/\[\d+\]/mi, "")
24
65
  text << paragraph
25
66
  cnt += 1
26
67
  end
27
68
  end
28
-
29
69
  text = "#{text[0..Onebox::LayoutSupport.max_text]}..." if text.length > Onebox::LayoutSupport.max_text
30
70
  result = {
31
71
  link: link,
32
- title: raw.css("html body h1").inner_text,
72
+ title: raw.css("html body h1").inner_text + (section_title_text ? " | " + section_title_text : ""), #if a section sub title exists add it to the main article title
33
73
  description: text
34
74
  }
35
75
  img = raw.css(".image img")
@@ -1,3 +1,3 @@
1
1
  module Onebox
2
- VERSION = "1.4.9"
2
+ VERSION = "1.5.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onebox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.9
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joanna Zeta
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-08-29 00:00:00.000000000 Z
13
+ date: 2014-09-09 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: multi_json
@@ -342,7 +342,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
342
342
  version: '0'
343
343
  requirements: []
344
344
  rubyforge_project:
345
- rubygems_version: 2.1.11
345
+ rubygems_version: 2.2.2
346
346
  signing_key:
347
347
  specification_version: 4
348
348
  summary: A gem for turning URLs into previews.