markdownr 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6e04e007ceb9d03b7cadc1cfbb9b7d6d4df51f83d82fecba4fc22d52623b50fe
4
- data.tar.gz: ed3c37ef77810c9cd2589314d9d3239fb77f48e0ac3e1ede670146db9ff2c1bc
3
+ metadata.gz: 44c223ecfa0f81c35d2d3d89a5c316eb2adc7f0a343db241185dc828268d3527
4
+ data.tar.gz: 7b87a1fed84d540c89bc92bea8de754eee3d680ba93f952ad4b23c7150aa06b3
5
5
  SHA512:
6
- metadata.gz: dc882e485d1eadb2415443b5056dca89534fe1b561118d508060b126b53332a507ee2ef375faa648444cdc0ef394179de172acdee13981d30d8f89737dba6bed
7
- data.tar.gz: 0feb715779946a7456559618e3493986520dcbbeeab7714caa7049e6d4d196a90471fbd90102e9b060ddee7c9f2028fbf6ab5215e9de85c0b793ec4d9776e18c
6
+ metadata.gz: 8eb259d062a13af47d3126ea380777e01aba1f1e6bc9401fb2f05c8277e9a827d6e16f9299e96768236574114875c0f4d2b5ab273094513c71670afa765f3e12
7
+ data.tar.gz: 56fda7d165fecbec5b46d8a496d257aa38164684779225bfac6e1d9da00df3fb8da3e90fbd4c58c435f2680a21869271f06c4b3791df48b9ac1b6d64e7359277
@@ -580,6 +580,145 @@ module MarkdownServer
580
580
  out.length > 10_000 ? out[0, 10_000] : out
581
581
  end
582
582
 
583
+ def blueletterbible_html(html, url)
584
+ base = "https://www.blueletterbible.org"
585
+
586
+ # ── Word ──────────────────────────────────────────────────────────────
587
+ word = html[/<h6[^>]+class="lexTitle(?:Gk|Hb)"[^>]*>(.*?)<\/h6>/im, 1]
588
+ &.gsub(/<[^>]+>/, "")&.strip || ""
589
+
590
+ # ── Transliteration ───────────────────────────────────────────────────
591
+ transliteration = html[/<div[^>]+id="lexTrans".*?<em>(.*?)<\/em>/im, 1]&.strip || ""
592
+
593
+ # ── Pronunciation + audio ─────────────────────────────────────────────
594
+ pronunciation = html[/class="[^"]*lexicon-pronunc[^"]*"[^>]*>\s*([^\n<]{1,50})/i, 1]&.strip || ""
595
+ data_pronunc = html[/data-pronunc="([a-fA-F0-9]{20,})"/i, 1] || ""
596
+ audio_btn = if data_pronunc.length > 10
597
+ au = "#{base}/lang/lexicon/lexPronouncePlayer.cfm?skin=#{data_pronunc}"
598
+ %(<button onclick="var a=this._a||(this._a=new Audio('#{h(au)}'));a.currentTime=0;a.play();" ) +
599
+ %(style="background:none;border:none;cursor:pointer;padding:0 0 0 4px;font-size:1.1em;vertical-align:middle;" title="Play pronunciation">&#128266;</button>)
600
+ else
601
+ ""
602
+ end
603
+
604
+ # ── Part of speech ────────────────────────────────────────────────────
605
+ pos = html[/<div[^>]+id="lexPart".*?small-text-right"[^>]*>(.*?)<\/div>/im, 1]
606
+ &.gsub(/<[^>]+>/, "")&.strip || ""
607
+
608
+ # ── Info table ────────────────────────────────────────────────────────
609
+ info_rows = [
610
+ ["Word", h(word)],
611
+ ["Transliteration", "<em>#{h(transliteration)}</em>"],
612
+ ["Pronunciation", "#{h(pronunciation)}#{audio_btn}"],
613
+ ["Part of Speech", h(pos)],
614
+ ]
615
+ info_html = %(<table class="blb-table">) +
616
+ info_rows.map { |label, v|
617
+ %(<tr><th class="blb-th">#{h(label)}</th><td>#{v}</td></tr>)
618
+ }.join + "</table>"
619
+
620
+ # ── Inflections ───────────────────────────────────────────────────────
621
+ infl_html = ""
622
+ if (m = html.match(/<div\s[^>]*id="greek-tr-inflections"[^>]*>/im))
623
+ after_infl = html[m.end(0)..]
624
+ stop = after_infl.index(/<div\s[^>]*id="greek-(?:mgnt|lxx)-inflections"/i) || after_infl.length
625
+ infl_section = after_infl[0...stop]
626
+ inflections = []
627
+ infl_section.scan(/<div\s[^>]*class="greekInflection"[^>]*>(.*?)<\/div>\s*<\/div>/im) do |mv|
628
+ chunk = mv[0]
629
+ href = chunk[/href="([^"]+)"/i, 1]
630
+ gk = chunk[/<span[^>]+class="Gk"[^>]*>(.*?)<\/span>/im, 1]&.gsub(/<[^>]+>/, "")&.strip || ""
631
+ freq = chunk[/&#8212;\s*(\d+)x<\/a>/i, 1]&.to_i || 0
632
+ next if gk.empty? || freq.zero?
633
+ inflections << { word: gk, freq: freq,
634
+ href: href ? base + href.gsub("&amp;", "&") : nil }
635
+ end
636
+ inflections.sort_by! { |i| -i[:freq] }
637
+ if inflections.any?
638
+ rows = inflections.map { |i|
639
+ link = i[:href] ? %(<a href="#{h(i[:href])}" target="_blank" rel="noopener">#{h(i[:word])}</a>) : h(i[:word])
640
+ %(<tr><td>#{link}</td><td class="blb-right">#{i[:freq]}x</td></tr>)
641
+ }.join
642
+ infl_html = %(<h4 class="blb-heading">Inflections</h4>) +
643
+ %(<table class="blb-table"><thead><tr><th class="blb-th">Form</th>) +
644
+ %(<th class="blb-th blb-right">Count</th></tr></thead><tbody>#{rows}</tbody></table>)
645
+ end
646
+ end
647
+
648
+ # ── Biblical Usage ────────────────────────────────────────────────────
649
+ usage_html = ""
650
+ if (um = html.match(/<div[^>]+id="outlineBiblical"[^>]*>/im))
651
+ after_usage = html[um.end(0)..]
652
+ if (inner = after_usage.match(/\A\s*<div>([\s\S]*?)<\/div>\s*<\/div>/im))
653
+ cleaned = inner[1]
654
+ .gsub(/<(\/?)(\w+)[^>]*>/) { "<#{$1}#{$2.downcase}>" }
655
+ .gsub(/[ \t]+/, " ")
656
+ .strip
657
+ usage_html = %(<h4 class="blb-heading">Biblical Usage</h4><div class="blb-usage">#{cleaned}</div>)
658
+ end
659
+ end
660
+
661
+ # ── Concordance ───────────────────────────────────────────────────────
662
+ conc_html = ""
663
+ trans_name = html[/id="bibleTable"[^>]+data-translation="([^"]+)"/i, 1] || ""
664
+ verses = []
665
+ html.split(/<div\s[^>]*id="bVerse_\d+"[^>]*>/).drop(1).each do |chunk|
666
+ cite_href = chunk[/tablet-order-2[^>]*>[\s\S]{0,400}?href="([^"]+)"/im, 1] || ""
667
+ cite = chunk[/tablet-order-2[^>]*>[\s\S]{0,400}?<a[^>]*>(.*?)<\/a>/im, 1]
668
+ &.gsub(/<[^>]+>/, "")&.strip || ""
669
+
670
+ # Process verse HTML: highlight the matched word, strip all Strong's refs
671
+ raw_html = chunk[/class="EngBibleText[^"]*"[^>]*>([\s\S]*?)<\/div>/im, 1] || ""
672
+ raw_html.gsub!(/<img[^>]*>/, "")
673
+ raw_html.gsub!(/<a[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/a>/im, "")
674
+ raw_html.gsub!(/<span[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/span>/im, "")
675
+ # Use control-char placeholders so blb-match survives the tag-strip pass
676
+ verse_html = raw_html.gsub(/<span\s[^>]*class="word-phrase"[^>]*>([\s\S]*?)<\/span>/im) do
677
+ inner = $1
678
+ word = inner.sub(/<sup[\s\S]*/im, "").gsub(/<[^>]+>/, "")
679
+ .gsub(/&nbsp;/i, " ").strip
680
+ inner.match?(/<sup[^>]*class="[^"]*strongs criteria[^"]*"/i) ?
681
+ "\x02#{word}\x03" : word
682
+ end
683
+ # Fallback for translations without word-phrase spans (NASB, ESV, etc.)
684
+ # The criteria word appears directly before its <sup class="strongs criteria"> tag
685
+ unless verse_html.include?("\x02")
686
+ verse_html.gsub!(/([\w]+[,;:.!?'"]*)\s*<sup[^>]*class="[^"]*strongs criteria[^"]*"[\s\S]*?<\/sup>/im) do
687
+ "\x02#{$1}\x03"
688
+ end
689
+ end
690
+ verse_html.gsub!(/<sup[^>]*>[\s\S]*?<\/sup>/im, "")
691
+ verse_html.gsub!(/<[^>]+>/, "")
692
+ verse_html.gsub!(/&nbsp;/i, " ")
693
+ verse_html.gsub!(/&#(\d+);/) { [$1.to_i].pack("U") rescue " " }
694
+ verse_html.gsub!(/&#x([\da-f]+);/i) { [$1.to_i(16)].pack("U") rescue " " }
695
+ verse_html.gsub!(/&amp;/, "&")
696
+ verse_html.gsub!(/&lt;/, "<")
697
+ verse_html.gsub!(/&gt;/, ">")
698
+ verse_html.gsub!(/\s+/, " ")
699
+ verse_html.strip!
700
+ # Strip the mobile citation prefix ("Mat 5:17 - ") left by hide-for-tablet removal
701
+ verse_html.sub!(/\A#{Regexp.escape(cite)}\s*-\s*/i, "")
702
+ # Restore match placeholders as highlighted spans
703
+ verse_html.gsub!(/\x02([^\x03]*)\x03/) { %(<span class="blb-match">#{h($1.strip)}</span>) }
704
+
705
+ next if cite.empty? || verse_html.empty?
706
+ full_href = cite_href.empty? ? nil : (cite_href.start_with?("http") ? cite_href : base + cite_href)
707
+ verses << { cite: cite, verse_html: verse_html, href: full_href }
708
+ end
709
+ if verses.any?
710
+ heading = trans_name.empty? ? "Concordance" : "Concordance (#{h(trans_name)})"
711
+ rows = verses.map { |v|
712
+ link = v[:href] ? %(<a href="#{h(v[:href])}" target="_blank" rel="noopener">#{h(v[:cite])}</a>) : h(v[:cite])
713
+ %(<tr><td class="blb-nowrap">#{link}</td><td>#{v[:verse_html]}</td></tr>)
714
+ }.join
715
+ conc_html = %(<h4 class="blb-heading">#{heading}</h4>) +
716
+ %(<table class="blb-table"><tbody>#{rows}</tbody></table>)
717
+ end
718
+
719
+ info_html + infl_html + usage_html + conc_html
720
+ end
721
+
583
722
  def compile_regexes(query)
584
723
  words = query.split(/\s+/).reject(&:empty?)
585
724
  return nil if words.empty?
@@ -622,6 +761,98 @@ module MarkdownServer
622
761
  end
623
762
  end
624
763
 
764
+ get "/debug/raw-fetch" do
765
+ url = params[:url].to_s.strip
766
+ halt 400, "missing ?url=" if url.empty?
767
+ html = fetch_external_page(url)
768
+ halt 502, "fetch failed" unless html
769
+ content_type :text
770
+ # Show processing steps for first verse
771
+ chunk = html.split(/<div\s[^>]*id="bVerse_\d+"[^>]*>/).drop(1).first
772
+ return "no bVerse chunks found" unless chunk
773
+
774
+ cite = chunk[/tablet-order-2[^>]*>[\s\S]{0,400}?<a[^>]*>(.*?)<\/a>/im, 1]
775
+ &.gsub(/<[^>]+>/, "")&.strip || "?"
776
+ raw_html = chunk[/class="EngBibleText[^"]*"[^>]*>([\s\S]*?)<\/div>/im, 1] || "(no EngBibleText found)"
777
+
778
+ lines = ["=== cite: #{cite} ===",
779
+ "=== EngBibleText raw (#{raw_html.length} chars) ===",
780
+ raw_html, ""]
781
+
782
+ # Simulate the processing steps
783
+ rh = raw_html.dup
784
+ rh.gsub!(/<img[^>]*>/, "")
785
+ rh.gsub!(/<a[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/a>/im, "")
786
+ rh.gsub!(/<span[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/span>/im, "")
787
+
788
+ wp_matches = rh.scan(/<span\s[^>]*class="word-phrase"[^>]*>([\s\S]*?)<\/span>/im)
789
+ lines << "=== word-phrase matches (#{wp_matches.length}) ==="
790
+ wp_matches.each_with_index do |(inner), i|
791
+ is_criteria = inner.match?(/<sup[^>]*class="[^"]*strongs criteria[^"]*"/i)
792
+ word = inner.sub(/<sup[\s\S]*/im, "").gsub(/<[^>]+>/, "").gsub(/&nbsp;/i, " ").strip
793
+ lines << " [#{i}] criteria=#{is_criteria} word=#{word.inspect}"
794
+ end
795
+
796
+ # Now simulate the full processing pipeline
797
+ verse_html = rh.gsub(/<span\s[^>]*class="word-phrase"[^>]*>([\s\S]*?)<\/span>/im) do
798
+ inner = $1
799
+ word = inner.sub(/<sup[\s\S]*/im, "").gsub(/<[^>]+>/, "").gsub(/&nbsp;/i, " ").strip
800
+ inner.match?(/<sup[^>]*class="[^"]*strongs criteria[^"]*"/i) ? "\x02#{word}\x03" : word
801
+ end
802
+ lines << "\n=== after word-phrase gsub (placeholder check) ==="
803
+ lines << " contains \\x02: #{verse_html.include?("\x02")}"
804
+ lines << " contains \\x03: #{verse_html.include?("\x03")}"
805
+ ph = verse_html[/\x02[^\x03]*\x03/]
806
+ lines << " placeholder found: #{ph.inspect}"
807
+
808
+ verse_html.gsub!(/<sup[^>]*>[\s\S]*?<\/sup>/im, "")
809
+ verse_html.gsub!(/<[^>]+>/, "")
810
+ verse_html.gsub!(/&nbsp;/i, " ")
811
+ verse_html.gsub!(/&#(\d+);/) { [$1.to_i].pack("U") rescue " " }
812
+ verse_html.gsub!(/&#x([\da-f]+);/i) { [$1.to_i(16)].pack("U") rescue " " }
813
+ verse_html.gsub!(/&amp;/, "&")
814
+ verse_html.gsub!(/&lt;/, "<")
815
+ verse_html.gsub!(/&gt;/, ">")
816
+ verse_html.gsub!(/\s+/, " ")
817
+ verse_html.strip!
818
+
819
+ lines << "=== after tag-strip (placeholder check) ==="
820
+ lines << " contains \\x02: #{verse_html.include?("\x02")}"
821
+ ph2 = verse_html[/\x02[^\x03]*\x03/]
822
+ lines << " placeholder found: #{ph2.inspect}"
823
+ lines << " verse_html snippet: #{verse_html[0, 200].inspect}"
824
+
825
+ # Apply the final restore
826
+ restored = verse_html.gsub(/\x02([^\x03]*)\x03/) { "<span class=\"blb-match\">#{$1.strip}</span>" }
827
+ lines << "\n=== after placeholder restore ==="
828
+ lines << " restored snippet: #{restored[0, 300].inspect}"
829
+
830
+ # Now compare with actual blueletterbible_html output
831
+ full_output = blueletterbible_html(html, url)
832
+ conc_match = full_output[/blb-match[^<]*<\/span>/]
833
+ lines << "\n=== blueletterbible_html output (blb-match check) ==="
834
+ lines << " contains blb-match: #{full_output.include?("blb-match")}"
835
+ lines << " blb-match context: #{conc_match.inspect}"
836
+ # Show the concordance section
837
+ conc_start = full_output.index("blb-heading") ? full_output.rindex("<h4", full_output.index("Concordance") || 0) : nil
838
+ if conc_start
839
+ lines << " concordance html (first 500 chars): #{full_output[conc_start, 500].inspect}"
840
+ end
841
+
842
+ lines.join("\n")
843
+ end
844
+
845
+ get "/debug/fetch" do
846
+ url = params[:url].to_s.strip
847
+ halt 400, "missing ?url=" if url.empty?
848
+ html = fetch_external_page(url)
849
+ halt 502, "fetch failed" unless html
850
+ content_type :html
851
+ rendered = url.match?(/blueletterbible\.org\/lexicon\//i) ? blueletterbible_html(html, url) : page_html(html, url)
852
+ blb_css = "<style>.blb-table{width:100%;border-collapse:collapse;font-size:.85rem;margin-bottom:.6rem}.blb-table th,.blb-table td{padding:3px 7px;border:1px solid #ddd}.blb-th{text-align:left;font-weight:normal;background:#f5f0e4;color:#555;width:38%}.blb-right{text-align:right}.blb-nowrap{white-space:nowrap;vertical-align:top}.blb-match{color:#b33!important;font-weight:700!important;font-style:italic!important}.blb-heading{font-size:.82rem;font-weight:600;margin:.7rem 0 .25rem;color:#555;text-transform:uppercase;letter-spacing:.04em}.blb-usage{font-size:.85rem}.blb-usage ol{margin:.1rem 0 .1rem 1.3rem;padding:0;list-style-type:decimal}.blb-usage ol ol{list-style-type:lower-alpha}.blb-usage li{margin-bottom:.15rem}</style>"
853
+ "<html><head>#{blb_css}</head><body style='font-family:sans-serif;max-width:800px;margin:2rem auto;padding:0 1rem'>#{rendered}</body></html>"
854
+ end
855
+
625
856
  get "/download/*" do
626
857
  requested = params["splat"].first.to_s
627
858
  real_path = safe_path(requested)
@@ -680,8 +911,15 @@ module MarkdownServer
680
911
  html = fetch_external_page(url)
681
912
  halt 502, '{"error":"fetch failed"}' unless html
682
913
 
683
- title = page_title(html).sub(/ [-–] .*/, "").strip
684
- JSON.dump({ title: title, html: page_html(html, url) })
914
+ if url.match?(/blueletterbible\.org\/lexicon\//i)
915
+ raw = page_title(html)
916
+ title = raw.match(/^([GH]\d+ - \w+)/i)&.[](1)&.sub(" - ", " – ") ||
917
+ raw.sub(/ [-–] .*/, "").strip
918
+ JSON.dump({ title: title, html: blueletterbible_html(html, url) })
919
+ else
920
+ title = page_title(html).sub(/ [-–] .*/, "").strip
921
+ JSON.dump({ title: title, html: page_html(html, url) })
922
+ end
685
923
  end
686
924
 
687
925
  get "/search/?*" do
@@ -1,3 +1,3 @@
1
1
  module MarkdownServer
2
- VERSION = "0.5.6"
2
+ VERSION = "0.5.8"
3
3
  end
data/views/layout.erb CHANGED
@@ -909,6 +909,22 @@
909
909
  .link-preview-popup th, .link-preview-popup td { border: 1px solid #ddd; padding: 0.3rem 0.5rem; }
910
910
  .link-ctx-popup-body th, .link-preview-popup th { background: #f5f0e4; }
911
911
 
912
+ /* Blue Letter Bible popup tables */
913
+ .blb-table { width: 100%; border-collapse: collapse; font-size: 0.85rem; margin-bottom: 0.6rem; }
914
+ .blb-table th, .blb-table td { padding: 3px 7px; border: 1px solid #ddd; }
915
+ .blb-th { text-align: left; font-weight: normal; background: #f5f0e4; color: #555; width: 38%; }
916
+ .blb-right { text-align: right; }
917
+ .blb-nowrap { white-space: nowrap; vertical-align: top; }
918
+ .blb-match { color: #b33 !important; font-weight: 700 !important; font-style: italic !important; }
919
+ .blb-heading { font-size: 0.82rem; font-weight: 600; margin: 0.7rem 0 0.25rem; color: #555; text-transform: uppercase; letter-spacing: 0.04em; }
920
+ .blb-usage { font-size: 0.85rem; }
921
+ .blb-usage ol { margin: 0.1rem 0 0.1rem 1.3rem; padding: 0; list-style-type: decimal; }
922
+ .blb-usage ol ol { list-style-type: lower-alpha; }
923
+ .blb-usage ol ol ol { list-style-type: lower-roman; }
924
+ .blb-usage ol ol ol ol { list-style-type: lower-alpha; }
925
+ .blb-usage li { margin-bottom: 0.15rem; }
926
+ .blb-usage p { margin: 0; }
927
+
912
928
  /* Footnote tooltips */
913
929
  .footnote-tooltip {
914
930
  position: absolute;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markdownr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.6
4
+ version: 0.5.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Dunn