markdownr 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markdown_server/app.rb +106 -2
- data/lib/markdown_server/version.rb +1 -1
- data/views/layout.erb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 44c223ecfa0f81c35d2d3d89a5c316eb2adc7f0a343db241185dc828268d3527
|
|
4
|
+
data.tar.gz: 7b87a1fed84d540c89bc92bea8de754eee3d680ba93f952ad4b23c7150aa06b3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8eb259d062a13af47d3126ea380777e01aba1f1e6bc9401fb2f05c8277e9a827d6e16f9299e96768236574114875c0f4d2b5ab273094513c71670afa765f3e12
|
|
7
|
+
data.tar.gz: 56fda7d165fecbec5b46d8a496d257aa38164684779225bfac6e1d9da00df3fb8da3e90fbd4c58c435f2680a21869271f06c4b3791df48b9ac1b6d64e7359277
|
data/lib/markdown_server/app.rb
CHANGED
|
@@ -672,23 +672,35 @@ module MarkdownServer
|
|
|
672
672
|
raw_html.gsub!(/<img[^>]*>/, "")
|
|
673
673
|
raw_html.gsub!(/<a[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/a>/im, "")
|
|
674
674
|
raw_html.gsub!(/<span[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/span>/im, "")
|
|
675
|
+
# Use control-char placeholders so blb-match survives the tag-strip pass
|
|
675
676
|
verse_html = raw_html.gsub(/<span\s[^>]*class="word-phrase"[^>]*>([\s\S]*?)<\/span>/im) do
|
|
676
677
|
inner = $1
|
|
677
678
|
word = inner.sub(/<sup[\s\S]*/im, "").gsub(/<[^>]+>/, "")
|
|
678
679
|
.gsub(/ /i, " ").strip
|
|
679
680
|
inner.match?(/<sup[^>]*class="[^"]*strongs criteria[^"]*"/i) ?
|
|
680
|
-
|
|
681
|
+
"\x02#{word}\x03" : word
|
|
682
|
+
end
|
|
683
|
+
# Fallback for translations without word-phrase spans (NASB, ESV, etc.)
|
|
684
|
+
# The criteria word appears directly before its <sup class="strongs criteria"> tag
|
|
685
|
+
unless verse_html.include?("\x02")
|
|
686
|
+
verse_html.gsub!(/([\w]+[,;:.!?'"]*)\s*<sup[^>]*class="[^"]*strongs criteria[^"]*"[\s\S]*?<\/sup>/im) do
|
|
687
|
+
"\x02#{$1}\x03"
|
|
688
|
+
end
|
|
681
689
|
end
|
|
682
690
|
verse_html.gsub!(/<sup[^>]*>[\s\S]*?<\/sup>/im, "")
|
|
683
691
|
verse_html.gsub!(/<[^>]+>/, "")
|
|
684
692
|
verse_html.gsub!(/ /i, " ")
|
|
685
693
|
verse_html.gsub!(/&#(\d+);/) { [$1.to_i].pack("U") rescue " " }
|
|
686
694
|
verse_html.gsub!(/&#x([\da-f]+);/i) { [$1.to_i(16)].pack("U") rescue " " }
|
|
687
|
-
verse_html.gsub!(/&/, "&")
|
|
695
|
+
verse_html.gsub!(/&/, "&")
|
|
696
|
+
verse_html.gsub!(/</, "<")
|
|
697
|
+
verse_html.gsub!(/>/, ">")
|
|
688
698
|
verse_html.gsub!(/\s+/, " ")
|
|
689
699
|
verse_html.strip!
|
|
690
700
|
# Strip the mobile citation prefix ("Mat 5:17 - ") left by hide-for-tablet removal
|
|
691
701
|
verse_html.sub!(/\A#{Regexp.escape(cite)}\s*-\s*/i, "")
|
|
702
|
+
# Restore match placeholders as highlighted spans
|
|
703
|
+
verse_html.gsub!(/\x02([^\x03]*)\x03/) { %(<span class="blb-match">#{h($1.strip)}</span>) }
|
|
692
704
|
|
|
693
705
|
next if cite.empty? || verse_html.empty?
|
|
694
706
|
full_href = cite_href.empty? ? nil : (cite_href.start_with?("http") ? cite_href : base + cite_href)
|
|
@@ -749,6 +761,98 @@ module MarkdownServer
|
|
|
749
761
|
end
|
|
750
762
|
end
|
|
751
763
|
|
|
764
|
+
get "/debug/raw-fetch" do
|
|
765
|
+
url = params[:url].to_s.strip
|
|
766
|
+
halt 400, "missing ?url=" if url.empty?
|
|
767
|
+
html = fetch_external_page(url)
|
|
768
|
+
halt 502, "fetch failed" unless html
|
|
769
|
+
content_type :text
|
|
770
|
+
# Show processing steps for first verse
|
|
771
|
+
chunk = html.split(/<div\s[^>]*id="bVerse_\d+"[^>]*>/).drop(1).first
|
|
772
|
+
return "no bVerse chunks found" unless chunk
|
|
773
|
+
|
|
774
|
+
cite = chunk[/tablet-order-2[^>]*>[\s\S]{0,400}?<a[^>]*>(.*?)<\/a>/im, 1]
|
|
775
|
+
&.gsub(/<[^>]+>/, "")&.strip || "?"
|
|
776
|
+
raw_html = chunk[/class="EngBibleText[^"]*"[^>]*>([\s\S]*?)<\/div>/im, 1] || "(no EngBibleText found)"
|
|
777
|
+
|
|
778
|
+
lines = ["=== cite: #{cite} ===",
|
|
779
|
+
"=== EngBibleText raw (#{raw_html.length} chars) ===",
|
|
780
|
+
raw_html, ""]
|
|
781
|
+
|
|
782
|
+
# Simulate the processing steps
|
|
783
|
+
rh = raw_html.dup
|
|
784
|
+
rh.gsub!(/<img[^>]*>/, "")
|
|
785
|
+
rh.gsub!(/<a[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/a>/im, "")
|
|
786
|
+
rh.gsub!(/<span[^>]*class="hide-for-tablet"[^>]*>[\s\S]*?<\/span>/im, "")
|
|
787
|
+
|
|
788
|
+
wp_matches = rh.scan(/<span\s[^>]*class="word-phrase"[^>]*>([\s\S]*?)<\/span>/im)
|
|
789
|
+
lines << "=== word-phrase matches (#{wp_matches.length}) ==="
|
|
790
|
+
wp_matches.each_with_index do |(inner), i|
|
|
791
|
+
is_criteria = inner.match?(/<sup[^>]*class="[^"]*strongs criteria[^"]*"/i)
|
|
792
|
+
word = inner.sub(/<sup[\s\S]*/im, "").gsub(/<[^>]+>/, "").gsub(/ /i, " ").strip
|
|
793
|
+
lines << " [#{i}] criteria=#{is_criteria} word=#{word.inspect}"
|
|
794
|
+
end
|
|
795
|
+
|
|
796
|
+
# Now simulate the full processing pipeline
|
|
797
|
+
verse_html = rh.gsub(/<span\s[^>]*class="word-phrase"[^>]*>([\s\S]*?)<\/span>/im) do
|
|
798
|
+
inner = $1
|
|
799
|
+
word = inner.sub(/<sup[\s\S]*/im, "").gsub(/<[^>]+>/, "").gsub(/ /i, " ").strip
|
|
800
|
+
inner.match?(/<sup[^>]*class="[^"]*strongs criteria[^"]*"/i) ? "\x02#{word}\x03" : word
|
|
801
|
+
end
|
|
802
|
+
lines << "\n=== after word-phrase gsub (placeholder check) ==="
|
|
803
|
+
lines << " contains \\x02: #{verse_html.include?("\x02")}"
|
|
804
|
+
lines << " contains \\x03: #{verse_html.include?("\x03")}"
|
|
805
|
+
ph = verse_html[/\x02[^\x03]*\x03/]
|
|
806
|
+
lines << " placeholder found: #{ph.inspect}"
|
|
807
|
+
|
|
808
|
+
verse_html.gsub!(/<sup[^>]*>[\s\S]*?<\/sup>/im, "")
|
|
809
|
+
verse_html.gsub!(/<[^>]+>/, "")
|
|
810
|
+
verse_html.gsub!(/ /i, " ")
|
|
811
|
+
verse_html.gsub!(/&#(\d+);/) { [$1.to_i].pack("U") rescue " " }
|
|
812
|
+
verse_html.gsub!(/&#x([\da-f]+);/i) { [$1.to_i(16)].pack("U") rescue " " }
|
|
813
|
+
verse_html.gsub!(/&/, "&")
|
|
814
|
+
verse_html.gsub!(/</, "<")
|
|
815
|
+
verse_html.gsub!(/>/, ">")
|
|
816
|
+
verse_html.gsub!(/\s+/, " ")
|
|
817
|
+
verse_html.strip!
|
|
818
|
+
|
|
819
|
+
lines << "=== after tag-strip (placeholder check) ==="
|
|
820
|
+
lines << " contains \\x02: #{verse_html.include?("\x02")}"
|
|
821
|
+
ph2 = verse_html[/\x02[^\x03]*\x03/]
|
|
822
|
+
lines << " placeholder found: #{ph2.inspect}"
|
|
823
|
+
lines << " verse_html snippet: #{verse_html[0, 200].inspect}"
|
|
824
|
+
|
|
825
|
+
# Apply the final restore
|
|
826
|
+
restored = verse_html.gsub(/\x02([^\x03]*)\x03/) { "<span class=\"blb-match\">#{$1.strip}</span>" }
|
|
827
|
+
lines << "\n=== after placeholder restore ==="
|
|
828
|
+
lines << " restored snippet: #{restored[0, 300].inspect}"
|
|
829
|
+
|
|
830
|
+
# Now compare with actual blueletterbible_html output
|
|
831
|
+
full_output = blueletterbible_html(html, url)
|
|
832
|
+
conc_match = full_output[/blb-match[^<]*<\/span>/]
|
|
833
|
+
lines << "\n=== blueletterbible_html output (blb-match check) ==="
|
|
834
|
+
lines << " contains blb-match: #{full_output.include?("blb-match")}"
|
|
835
|
+
lines << " blb-match context: #{conc_match.inspect}"
|
|
836
|
+
# Show the concordance section
|
|
837
|
+
conc_start = full_output.index("blb-heading") ? full_output.rindex("<h4", full_output.index("Concordance") || 0) : nil
|
|
838
|
+
if conc_start
|
|
839
|
+
lines << " concordance html (first 500 chars): #{full_output[conc_start, 500].inspect}"
|
|
840
|
+
end
|
|
841
|
+
|
|
842
|
+
lines.join("\n")
|
|
843
|
+
end
|
|
844
|
+
|
|
845
|
+
get "/debug/fetch" do
|
|
846
|
+
url = params[:url].to_s.strip
|
|
847
|
+
halt 400, "missing ?url=" if url.empty?
|
|
848
|
+
html = fetch_external_page(url)
|
|
849
|
+
halt 502, "fetch failed" unless html
|
|
850
|
+
content_type :html
|
|
851
|
+
rendered = url.match?(/blueletterbible\.org\/lexicon\//i) ? blueletterbible_html(html, url) : page_html(html, url)
|
|
852
|
+
blb_css = "<style>.blb-table{width:100%;border-collapse:collapse;font-size:.85rem;margin-bottom:.6rem}.blb-table th,.blb-table td{padding:3px 7px;border:1px solid #ddd}.blb-th{text-align:left;font-weight:normal;background:#f5f0e4;color:#555;width:38%}.blb-right{text-align:right}.blb-nowrap{white-space:nowrap;vertical-align:top}.blb-match{color:#b33!important;font-weight:700!important;font-style:italic!important}.blb-heading{font-size:.82rem;font-weight:600;margin:.7rem 0 .25rem;color:#555;text-transform:uppercase;letter-spacing:.04em}.blb-usage{font-size:.85rem}.blb-usage ol{margin:.1rem 0 .1rem 1.3rem;padding:0;list-style-type:decimal}.blb-usage ol ol{list-style-type:lower-alpha}.blb-usage li{margin-bottom:.15rem}</style>"
|
|
853
|
+
"<html><head>#{blb_css}</head><body style='font-family:sans-serif;max-width:800px;margin:2rem auto;padding:0 1rem'>#{rendered}</body></html>"
|
|
854
|
+
end
|
|
855
|
+
|
|
752
856
|
get "/download/*" do
|
|
753
857
|
requested = params["splat"].first.to_s
|
|
754
858
|
real_path = safe_path(requested)
|
data/views/layout.erb
CHANGED
|
@@ -915,7 +915,7 @@
|
|
|
915
915
|
.blb-th { text-align: left; font-weight: normal; background: #f5f0e4; color: #555; width: 38%; }
|
|
916
916
|
.blb-right { text-align: right; }
|
|
917
917
|
.blb-nowrap { white-space: nowrap; vertical-align: top; }
|
|
918
|
-
.blb-match { color: #b33; font-weight:
|
|
918
|
+
.blb-match { color: #b33 !important; font-weight: 700 !important; font-style: italic !important; }
|
|
919
919
|
.blb-heading { font-size: 0.82rem; font-weight: 600; margin: 0.7rem 0 0.25rem; color: #555; text-transform: uppercase; letter-spacing: 0.04em; }
|
|
920
920
|
.blb-usage { font-size: 0.85rem; }
|
|
921
921
|
.blb-usage ol { margin: 0.1rem 0 0.1rem 1.3rem; padding: 0; list-style-type: decimal; }
|