sisu 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/sisu +367 -0
- data/bin/sisugem +367 -0
- data/data/sisu/image/arrow_next_red.png +0 -0
- data/data/sisu/image/arrow_prev_red.png +0 -0
- data/data/sisu/image/arrow_up_red.png +0 -0
- data/data/sisu/image/b_bluebell.png +0 -0
- data/data/sisu/image/b_doc.png +0 -0
- data/data/sisu/image/b_epub.png +0 -0
- data/data/sisu/image/b_home.png +0 -0
- data/data/sisu/image/b_info.png +0 -0
- data/data/sisu/image/b_odf.png +0 -0
- data/data/sisu/image/b_pdf.png +0 -0
- data/data/sisu/image/b_search.png +0 -0
- data/data/sisu/image/b_toc.png +0 -0
- data/data/sisu/image/bullet_08.png +0 -0
- data/data/sisu/image/bullet_09.png +0 -0
- data/data/sisu/image/bullet_10.png +0 -0
- data/data/sisu/image/bullet_11.png +0 -0
- data/data/sisu/image/bullet_12.png +0 -0
- data/data/sisu/image/bullet_doc.png +0 -0
- data/data/sisu/image/bullet_red.png +0 -0
- data/data/sisu/image/dot_clear.png +0 -0
- data/data/sisu/image/dot_white.png +0 -0
- data/data/sisu/image/gplv3.png +0 -0
- data/data/sisu/image/gplv3_free_software.png +0 -0
- data/data/sisu/image/next.png +0 -0
- data/data/sisu/image/previous.png +0 -0
- data/data/sisu/image/rb7.ico +0 -0
- data/data/sisu/image/sisu.png +0 -0
- data/data/sisu/image/toctoc.png +0 -0
- data/data/sisu/version.yml +5 -0
- data/lib/sisu.rb +94 -0
- data/lib/sisu/air.rb +80 -0
- data/lib/sisu/ao.rb +590 -0
- data/lib/sisu/ao_character_check.rb +102 -0
- data/lib/sisu/ao_composite.rb +286 -0
- data/lib/sisu/ao_doc_objects.rb +565 -0
- data/lib/sisu/ao_doc_str.rb +2269 -0
- data/lib/sisu/ao_endnotes.rb +136 -0
- data/lib/sisu/ao_expand_insertions.rb +514 -0
- data/lib/sisu/ao_hash_digest.rb +174 -0
- data/lib/sisu/ao_idx.rb +422 -0
- data/lib/sisu/ao_images.rb +187 -0
- data/lib/sisu/ao_metadata.rb +86 -0
- data/lib/sisu/ao_misc_arrange.rb +207 -0
- data/lib/sisu/ao_numbering.rb +720 -0
- data/lib/sisu/ao_persist.rb +194 -0
- data/lib/sisu/ao_references.rb +502 -0
- data/lib/sisu/ao_syntax.rb +640 -0
- data/lib/sisu/cgi.rb +84 -0
- data/lib/sisu/cgi_pgsql.rb +270 -0
- data/lib/sisu/cgi_sql_common.rb +986 -0
- data/lib/sisu/cgi_sqlite.rb +244 -0
- data/lib/sisu/conf.rb +287 -0
- data/lib/sisu/constants.rb +388 -0
- data/lib/sisu/css.rb +3484 -0
- data/lib/sisu/db_columns.rb +1997 -0
- data/lib/sisu/db_create.rb +689 -0
- data/lib/sisu/db_dbi.rb +90 -0
- data/lib/sisu/db_drop.rb +207 -0
- data/lib/sisu/db_import.rb +877 -0
- data/lib/sisu/db_indexes.rb +146 -0
- data/lib/sisu/db_load_tuple.rb +323 -0
- data/lib/sisu/db_remove.rb +182 -0
- data/lib/sisu/db_select.rb +230 -0
- data/lib/sisu/db_sqltxt.rb +173 -0
- data/lib/sisu/db_tests.rb +114 -0
- data/lib/sisu/dbi.rb +166 -0
- data/lib/sisu/dbi_discrete.rb +206 -0
- data/lib/sisu/digests.rb +306 -0
- data/lib/sisu/dp.rb +1606 -0
- data/lib/sisu/dp_identify_markup.rb +161 -0
- data/lib/sisu/dp_make.rb +668 -0
- data/lib/sisu/embedded.rb +149 -0
- data/lib/sisu/errors.rb +84 -0
- data/lib/sisu/generic_parts.rb +131 -0
- data/lib/sisu/git.rb +277 -0
- data/lib/sisu/html.rb +775 -0
- data/lib/sisu/html_concordance.rb +391 -0
- data/lib/sisu/html_format.rb +1348 -0
- data/lib/sisu/html_harvest.rb +109 -0
- data/lib/sisu/html_harvest_author_format.rb +111 -0
- data/lib/sisu/html_harvest_authors.rb +466 -0
- data/lib/sisu/html_harvest_topics.rb +893 -0
- data/lib/sisu/html_lite_shared.rb +324 -0
- data/lib/sisu/html_manifest.rb +1032 -0
- data/lib/sisu/html_minitoc.rb +230 -0
- data/lib/sisu/html_parts.rb +437 -0
- data/lib/sisu/html_persist.rb +237 -0
- data/lib/sisu/html_promo.rb +440 -0
- data/lib/sisu/html_scroll.rb +235 -0
- data/lib/sisu/html_segments.rb +716 -0
- data/lib/sisu/html_shared.rb +62 -0
- data/lib/sisu/html_table.rb +64 -0
- data/lib/sisu/html_tune.rb +301 -0
- data/lib/sisu/hub.rb +277 -0
- data/lib/sisu/hub_actions.rb +1122 -0
- data/lib/sisu/hub_loop_markup_files.rb +170 -0
- data/lib/sisu/hub_options.rb +1695 -0
- data/lib/sisu/i18n.rb +702 -0
- data/lib/sisu/manpage.rb +377 -0
- data/lib/sisu/manpage_format.rb +85 -0
- data/lib/sisu/object_munge.rb +307 -0
- data/lib/sisu/prog_text_translation.rb +1702 -0
- data/lib/sisu/qrcode.rb +754 -0
- data/lib/sisu/relaxng.rb +1153 -0
- data/lib/sisu/remote.rb +246 -0
- data/lib/sisu/rexml.rb +148 -0
- data/lib/sisu/se.rb +158 -0
- data/lib/sisu/se_cleanoutput.rb +145 -0
- data/lib/sisu/se_clear.rb +105 -0
- data/lib/sisu/se_createsite.rb +273 -0
- data/lib/sisu/se_css.rb +221 -0
- data/lib/sisu/se_date.rb +92 -0
- data/lib/sisu/se_db.rb +214 -0
- data/lib/sisu/se_envcall.rb +326 -0
- data/lib/sisu/se_file_op.rb +2758 -0
- data/lib/sisu/se_filemap.rb +247 -0
- data/lib/sisu/se_get_init.rb +238 -0
- data/lib/sisu/se_hub_particulars.rb +234 -0
- data/lib/sisu/se_info_env.rb +2179 -0
- data/lib/sisu/se_info_port.rb +70 -0
- data/lib/sisu/se_info_system.rb +202 -0
- data/lib/sisu/se_load.rb +108 -0
- data/lib/sisu/se_processing.rb +659 -0
- data/lib/sisu/se_programs.rb +394 -0
- data/lib/sisu/se_remotes.rb +553 -0
- data/lib/sisu/se_standardise_lang.rb +176 -0
- data/lib/sisu/se_version.rb +174 -0
- data/lib/sisu/shared_images.rb +137 -0
- data/lib/sisu/shared_markup_alt.rb +336 -0
- data/lib/sisu/shared_metadata.rb +1361 -0
- data/lib/sisu/shared_sem.rb +156 -0
- data/lib/sisu/sisu_thor_lib.rb +407 -0
- data/lib/sisu/sitemaps.rb +224 -0
- data/lib/sisu/src_kdissert_share.rb +102 -0
- data/lib/sisu/src_po4a_share.rb +309 -0
- data/lib/sisu/src_po4a_shelf.rb +1217 -0
- data/lib/sisu/src_po4a_shelf_set.rb +297 -0
- data/lib/sisu/src_po4a_sst_ao_sst.rb +893 -0
- data/lib/sisu/src_po4a_sst_ao_sst_set.rb +284 -0
- data/lib/sisu/src_po4a_sstm.rb +135 -0
- data/lib/sisu/src_shared.rb +347 -0
- data/lib/sisu/src_sisupod_make.rb +171 -0
- data/lib/sisu/src_sisupod_sstm.rb +109 -0
- data/lib/sisu/sst_convert_markup.rb +323 -0
- data/lib/sisu/sst_do_inline_footnotes.rb +440 -0
- data/lib/sisu/sst_from_xml.rb +178 -0
- data/lib/sisu/sst_identify_markup.rb +482 -0
- data/lib/sisu/sst_to_s_xml_sax.rb +471 -0
- data/lib/sisu/termsheet.rb +163 -0
- data/lib/sisu/texinfo.rb +430 -0
- data/lib/sisu/texinfo_format.rb +541 -0
- data/lib/sisu/texpdf.rb +1162 -0
- data/lib/sisu/texpdf_format.rb +1689 -0
- data/lib/sisu/texpdf_parts.rb +235 -0
- data/lib/sisu/txt_asciidoc.rb +354 -0
- data/lib/sisu/txt_asciidoc_decorate.rb +207 -0
- data/lib/sisu/txt_markdown.rb +389 -0
- data/lib/sisu/txt_markdown_decorate.rb +207 -0
- data/lib/sisu/txt_orgmode.rb +376 -0
- data/lib/sisu/txt_orgmode_decorate.rb +186 -0
- data/lib/sisu/txt_output.rb +86 -0
- data/lib/sisu/txt_plain.rb +410 -0
- data/lib/sisu/txt_plain_decorate.rb +189 -0
- data/lib/sisu/txt_read.rb +109 -0
- data/lib/sisu/txt_rst.rb +371 -0
- data/lib/sisu/txt_rst_decorate.rb +186 -0
- data/lib/sisu/txt_shared.rb +241 -0
- data/lib/sisu/txt_textile.rb +367 -0
- data/lib/sisu/txt_textile_decorate.rb +186 -0
- data/lib/sisu/update.rb +141 -0
- data/lib/sisu/urls.rb +696 -0
- data/lib/sisu/utils.rb +232 -0
- data/lib/sisu/utils_composite.rb +115 -0
- data/lib/sisu/utils_response.rb +114 -0
- data/lib/sisu/utils_screen_text_color.rb +472 -0
- data/lib/sisu/utils_spell.rb +99 -0
- data/lib/sisu/webrick.rb +191 -0
- data/lib/sisu/wikispeak.rb +375 -0
- data/lib/sisu/xhtml.rb +472 -0
- data/lib/sisu/xhtml_epub2.rb +890 -0
- data/lib/sisu/xhtml_epub2_concordance.rb +322 -0
- data/lib/sisu/xhtml_epub2_format.rb +2272 -0
- data/lib/sisu/xhtml_epub2_persist.rb +278 -0
- data/lib/sisu/xhtml_epub2_segments.rb +599 -0
- data/lib/sisu/xhtml_epub2_tune.rb +330 -0
- data/lib/sisu/xhtml_parts.rb +183 -0
- data/lib/sisu/xhtml_shared.rb +62 -0
- data/lib/sisu/xhtml_table.rb +97 -0
- data/lib/sisu/xml_docbook5.rb +376 -0
- data/lib/sisu/xml_dom.rb +624 -0
- data/lib/sisu/xml_fictionbook2.rb +389 -0
- data/lib/sisu/xml_format.rb +865 -0
- data/lib/sisu/xml_md_oai_pmh_dc.rb +229 -0
- data/lib/sisu/xml_odf_odt.rb +887 -0
- data/lib/sisu/xml_odf_odt_format.rb +674 -0
- data/lib/sisu/xml_parts.rb +191 -0
- data/lib/sisu/xml_persist.rb +126 -0
- data/lib/sisu/xml_sax.rb +521 -0
- data/lib/sisu/xml_scaffold_structure_collapsed.rb +198 -0
- data/lib/sisu/xml_scaffold_structure_sisu.rb +201 -0
- data/lib/sisu/xml_shared.rb +665 -0
- data/lib/sisu/xml_tables.rb +261 -0
- data/lib/sisu/zap.rb +90 -0
- metadata +251 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
=begin
|
3
|
+
|
4
|
+
* Name: SiSU
|
5
|
+
|
6
|
+
** Description: documents, structuring, processing, publishing, search
|
7
|
+
*** metadata harvest, harvest metadata from document corpus
|
8
|
+
|
9
|
+
** Author: Ralph Amissah
|
10
|
+
<ralph@amissah.com>
|
11
|
+
<ralph.amissah@gmail.com>
|
12
|
+
|
13
|
+
** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
|
14
|
+
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
|
15
|
+
All Rights Reserved.
|
16
|
+
|
17
|
+
** License: GPL 3 or later:
|
18
|
+
|
19
|
+
SiSU, a framework for document structuring, publishing and search
|
20
|
+
|
21
|
+
Copyright (C) Ralph Amissah
|
22
|
+
|
23
|
+
This program is free software: you can redistribute it and/or modify it
|
24
|
+
under the terms of the GNU General Public License as published by the Free
|
25
|
+
Software Foundation, either version 3 of the License, or (at your option)
|
26
|
+
any later version.
|
27
|
+
|
28
|
+
This program is distributed in the hope that it will be useful, but WITHOUT
|
29
|
+
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
30
|
+
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
31
|
+
more details.
|
32
|
+
|
33
|
+
You should have received a copy of the GNU General Public License along with
|
34
|
+
this program. If not, see <http://www.gnu.org/licenses/>.
|
35
|
+
|
36
|
+
If you have Internet connection, the latest version of the GPL should be
|
37
|
+
available at these locations:
|
38
|
+
<http://www.fsf.org/licensing/licenses/gpl.html>
|
39
|
+
<http://www.gnu.org/licenses/gpl.html>
|
40
|
+
|
41
|
+
<http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
|
42
|
+
|
43
|
+
** SiSU uses:
|
44
|
+
* Standard SiSU markup syntax,
|
45
|
+
* Standard SiSU meta-markup syntax, and the
|
46
|
+
* Standard SiSU object citation numbering and system
|
47
|
+
|
48
|
+
** Hompages:
|
49
|
+
<http://www.jus.uio.no/sisu>
|
50
|
+
<http://www.sisudoc.org>
|
51
|
+
|
52
|
+
** Git
|
53
|
+
<http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
|
54
|
+
<http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/harvest.rb;hb=HEAD>
|
55
|
+
|
56
|
+
=end
|
57
|
+
module SiSU_Harvest
|
58
|
+
@@the_idx_topics,@@the_idx_authors={},{}
|
59
|
+
class Source
|
60
|
+
require_relative 'hub_options' # hub_options.rb
|
61
|
+
require_relative 'html_harvest_topics' # html_harvest_topics.rb
|
62
|
+
require_relative 'html_harvest_authors' # html_harvest_authors.rb
|
63
|
+
require_relative 'se' # se.rb
|
64
|
+
include SiSU_Env
|
65
|
+
def initialize(opt)
|
66
|
+
@opt=opt
|
67
|
+
@env=SiSU_Env::InfoEnv.new
|
68
|
+
end
|
69
|
+
def read
|
70
|
+
begin
|
71
|
+
harvest_pth=@env.path.webserv + '/' + @opt.base_stub
|
72
|
+
FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth)
|
73
|
+
cases(@opt,@env)
|
74
|
+
rescue
|
75
|
+
ensure
|
76
|
+
SiSU_Env::CreateSite.new(@opt).cp_css
|
77
|
+
end
|
78
|
+
end
|
79
|
+
def help
|
80
|
+
puts <<WOK
|
81
|
+
harvest --harvest extracts document index metadata
|
82
|
+
|
83
|
+
WOK
|
84
|
+
end
|
85
|
+
def css(opt)
|
86
|
+
require_relative 'css' # css.rb
|
87
|
+
css=SiSU_Style::CSS.new
|
88
|
+
fn_css=SiSU_Env::CSS_Default.new
|
89
|
+
style=File.new("#{@env.path.pwd}/#{fn_css.harvest}",'w')
|
90
|
+
style << css.harvest
|
91
|
+
style.close
|
92
|
+
end
|
93
|
+
def cases(opt,env)
|
94
|
+
case opt.selections.str.inspect
|
95
|
+
when/--harvest/i
|
96
|
+
css(opt) if @opt.act[:maintenance][:set]==:on
|
97
|
+
SiSU_HarvestAuthors::Songsheet.new(opt,env).songsheet
|
98
|
+
SiSU_HarvestTopics::Songsheet.new(opt,env).songsheet
|
99
|
+
if @opt.act[:rsync][:set]==:on
|
100
|
+
require_relative 'remote' # remote.rb
|
101
|
+
SiSU_Remote::Put.new(opt).rsync_harvest
|
102
|
+
end
|
103
|
+
else
|
104
|
+
help
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
__END__
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
=begin
|
3
|
+
|
4
|
+
* Name: SiSU
|
5
|
+
|
6
|
+
** Description: documents, structuring, processing, publishing, search
|
7
|
+
*** system environment, resource control and configuration details
|
8
|
+
|
9
|
+
** Author: Ralph Amissah
|
10
|
+
<ralph@amissah.com>
|
11
|
+
<ralph.amissah@gmail.com>
|
12
|
+
|
13
|
+
** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
|
14
|
+
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
|
15
|
+
All Rights Reserved.
|
16
|
+
|
17
|
+
** License: GPL 3 or later:
|
18
|
+
|
19
|
+
SiSU, a framework for document structuring, publishing and search
|
20
|
+
|
21
|
+
Copyright (C) Ralph Amissah
|
22
|
+
|
23
|
+
This program is free software: you can redistribute it and/or modify it
|
24
|
+
under the terms of the GNU General Public License as published by the Free
|
25
|
+
Software Foundation, either version 3 of the License, or (at your option)
|
26
|
+
any later version.
|
27
|
+
|
28
|
+
This program is distributed in the hope that it will be useful, but WITHOUT
|
29
|
+
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
30
|
+
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
31
|
+
more details.
|
32
|
+
|
33
|
+
You should have received a copy of the GNU General Public License along with
|
34
|
+
this program. If not, see <http://www.gnu.org/licenses/>.
|
35
|
+
|
36
|
+
If you have Internet connection, the latest version of the GPL should be
|
37
|
+
available at these locations:
|
38
|
+
<http://www.fsf.org/licensing/licenses/gpl.html>
|
39
|
+
<http://www.gnu.org/licenses/gpl.html>
|
40
|
+
|
41
|
+
<http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
|
42
|
+
|
43
|
+
** SiSU uses:
|
44
|
+
* Standard SiSU markup syntax,
|
45
|
+
* Standard SiSU meta-markup syntax, and the
|
46
|
+
* Standard SiSU object citation numbering and system
|
47
|
+
|
48
|
+
** Hompages:
|
49
|
+
<http://www.jus.uio.no/sisu>
|
50
|
+
<http://www.sisudoc.org>
|
51
|
+
|
52
|
+
** Git
|
53
|
+
<http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
|
54
|
+
<http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/harvest_author_format.rb;hb=HEAD>
|
55
|
+
|
56
|
+
=end
|
57
|
+
module SiSU_FormatAuthor
|
58
|
+
class Author
|
59
|
+
def initialize(author_param)
|
60
|
+
@author_param=author_param
|
61
|
+
end
|
62
|
+
def author_details
|
63
|
+
@authors,@author_array=[],[]
|
64
|
+
authors=@author_param.scan(/[^;]+/)
|
65
|
+
authors.each do |a|
|
66
|
+
a=a.strip
|
67
|
+
if a =~/"(.+?)"/
|
68
|
+
@authors << { the: $1 }
|
69
|
+
@author_array << $1.upcase
|
70
|
+
else #if a =~/,/
|
71
|
+
x=a.scan(/[^,]+/)
|
72
|
+
x[0]=x[0].strip
|
73
|
+
x[1]=x[1].strip if x[1]
|
74
|
+
if x.length==1
|
75
|
+
@authors << { the: x[0] }
|
76
|
+
@author_array << x[0].upcase
|
77
|
+
elsif x.length==2
|
78
|
+
@authors << { the: x[0], others: x[1] }
|
79
|
+
@author_array << "#{x[0].upcase}, #{x[1]}"
|
80
|
+
else #p x.length
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
l = @authors.length
|
85
|
+
authors_string=''
|
86
|
+
@authors.each_with_index do |a,i|
|
87
|
+
authors_string += if a[:others]
|
88
|
+
if (l - i) > 1
|
89
|
+
"#{a[:others]} #{a[:the]}, "
|
90
|
+
else
|
91
|
+
"#{a[:others]} #{a[:the]}"
|
92
|
+
end
|
93
|
+
else
|
94
|
+
if (l - i) > 2
|
95
|
+
"#{a[:the]}, "
|
96
|
+
else
|
97
|
+
"#{a[:the]}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
{
|
102
|
+
last_first_a: authors,
|
103
|
+
last_first_format_a: @author_array,
|
104
|
+
authors_h: @authors,
|
105
|
+
authors_s: authors_string,
|
106
|
+
authors_param: @author_param
|
107
|
+
}
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
__END__
|
@@ -0,0 +1,466 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
=begin
|
3
|
+
|
4
|
+
* Name: SiSU
|
5
|
+
|
6
|
+
** Description: documents, structuring, processing, publishing, search
|
7
|
+
*** metadata harvest, extract authors and their writings from document set
|
8
|
+
|
9
|
+
** Author: Ralph Amissah
|
10
|
+
<ralph@amissah.com>
|
11
|
+
<ralph.amissah@gmail.com>
|
12
|
+
|
13
|
+
** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
|
14
|
+
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
|
15
|
+
All Rights Reserved.
|
16
|
+
|
17
|
+
** License: GPL 3 or later:
|
18
|
+
|
19
|
+
SiSU, a framework for document structuring, publishing and search
|
20
|
+
|
21
|
+
Copyright (C) Ralph Amissah
|
22
|
+
|
23
|
+
This program is free software: you can redistribute it and/or modify it
|
24
|
+
under the terms of the GNU General Public License as published by the Free
|
25
|
+
Software Foundation, either version 3 of the License, or (at your option)
|
26
|
+
any later version.
|
27
|
+
|
28
|
+
This program is distributed in the hope that it will be useful, but WITHOUT
|
29
|
+
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
30
|
+
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
31
|
+
more details.
|
32
|
+
|
33
|
+
You should have received a copy of the GNU General Public License along with
|
34
|
+
this program. If not, see <http://www.gnu.org/licenses/>.
|
35
|
+
|
36
|
+
If you have Internet connection, the latest version of the GPL should be
|
37
|
+
available at these locations:
|
38
|
+
<http://www.fsf.org/licensing/licenses/gpl.html>
|
39
|
+
<http://www.gnu.org/licenses/gpl.html>
|
40
|
+
|
41
|
+
<http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
|
42
|
+
|
43
|
+
** SiSU uses:
|
44
|
+
* Standard SiSU markup syntax,
|
45
|
+
* Standard SiSU meta-markup syntax, and the
|
46
|
+
* Standard SiSU object citation numbering and system
|
47
|
+
|
48
|
+
** Hompages:
|
49
|
+
<http://www.jus.uio.no/sisu>
|
50
|
+
<http://www.sisudoc.org>
|
51
|
+
|
52
|
+
** Git
|
53
|
+
<http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
|
54
|
+
<http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/harvest_authors.rb;hb=HEAD>
|
55
|
+
|
56
|
+
=end
|
57
|
+
module SiSU_HarvestAuthors
|
58
|
+
require_relative 'html_harvest_author_format' # html_harvest_author_format.rb
|
59
|
+
require_relative 'html_parts' # html_parts.rb
|
60
|
+
class Songsheet
|
61
|
+
@@the_idx_authors={}
|
62
|
+
def initialize(opt,env)
|
63
|
+
@opt,@env=opt,env
|
64
|
+
@file_list=opt.files
|
65
|
+
end
|
66
|
+
def songsheet
|
67
|
+
idx_array={}
|
68
|
+
@opt.f_pths.each do |y|
|
69
|
+
lang_hash_file_array={}
|
70
|
+
name=y[:f]
|
71
|
+
filename=y[:pth] + '/' + y[:f]
|
72
|
+
File.open(filename,'r') do |file|
|
73
|
+
file.each_line("\n\n") do |line|
|
74
|
+
if line =~/^@(?:title|creator|date):(?:\s|$)/m
|
75
|
+
lang_hash_file_array[y[:lng_is]] ||= []
|
76
|
+
lang_hash_file_array[y[:lng_is]] << line
|
77
|
+
elsif line =~/^@\S+?:(?:\s|$)/m \
|
78
|
+
or line =~/^(?:\s*\n|%+ )/
|
79
|
+
else break
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
lang_hash_file_array.each_pair do |lang,a|
|
84
|
+
idx_array[lang] ||= []
|
85
|
+
idx_array=SiSU_HarvestAuthors::Harvest.new(
|
86
|
+
@opt,
|
87
|
+
@env,
|
88
|
+
a,
|
89
|
+
filename,
|
90
|
+
name,
|
91
|
+
idx_array,
|
92
|
+
lang
|
93
|
+
).extract_harvest
|
94
|
+
end
|
95
|
+
end
|
96
|
+
the_idx=SiSU_HarvestAuthors::Index.new(
|
97
|
+
idx_array,
|
98
|
+
@@the_idx_authors
|
99
|
+
).construct_book_author_index
|
100
|
+
SiSU_HarvestAuthors::OutputIndex.new(
|
101
|
+
@opt,
|
102
|
+
the_idx
|
103
|
+
).html_print.html_songsheet
|
104
|
+
end
|
105
|
+
end
|
106
|
+
class Harvest
|
107
|
+
def initialize(opt,env,data,filename,name,idx_array,lang)
|
108
|
+
@opt, @env,@data,@filename,@name,@idx_array,@lang=
|
109
|
+
opt,env, data, filename, name, idx_array, lang
|
110
|
+
end
|
111
|
+
def extract_harvest
|
112
|
+
data, filename, name, idx_array, lang =
|
113
|
+
@data,@filename,@name,@idx_array,@lang
|
114
|
+
@title=@subtitle=@fulltitle=@author=@author_format=@date=nil
|
115
|
+
@authors=[]
|
116
|
+
rgx={}
|
117
|
+
rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
|
118
|
+
rgx[:title]=/^@title:[ ]+(.+)/
|
119
|
+
rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m
|
120
|
+
rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m
|
121
|
+
data.each do |para|
|
122
|
+
if para=~ rgx[:title]
|
123
|
+
@title=rgx[:title].match(para)[1]
|
124
|
+
end
|
125
|
+
if para=~ rgx[:subtitle]
|
126
|
+
@subtitle=rgx[:subtitle].match(para)[1]
|
127
|
+
end
|
128
|
+
if para=~ rgx[:author]
|
129
|
+
@author_format=rgx[:author].match(para)[1]
|
130
|
+
end
|
131
|
+
if para=~ rgx[:date]
|
132
|
+
@date=rgx[:date].match(para)[1]
|
133
|
+
end
|
134
|
+
break if @title && @subtitle && @author && @date
|
135
|
+
end
|
136
|
+
@fulltitle=@subtitle \
|
137
|
+
? (@title + ' - ' + @subtitle)
|
138
|
+
: @title
|
139
|
+
if @title \
|
140
|
+
and @author_format
|
141
|
+
creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details
|
142
|
+
@authors,@authorship=creator[:authors],creator[:authorship]
|
143
|
+
file=if name=~/~[a-z]{2,3}\.ss[mt]$/
|
144
|
+
name.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
|
145
|
+
else
|
146
|
+
name.sub(/\.ss[mt]$/,'')
|
147
|
+
end
|
148
|
+
page=if @env.output_dir_structure.by? == :language
|
149
|
+
"#{lang}/sisu_manifest.html"
|
150
|
+
else
|
151
|
+
"sisu_manifest.#{lang}.html"
|
152
|
+
end
|
153
|
+
idx_array[lang] <<= {
|
154
|
+
filename: filename,
|
155
|
+
file: file,
|
156
|
+
date: @date,
|
157
|
+
title: @fulltitle,
|
158
|
+
author: creator,
|
159
|
+
page: page,
|
160
|
+
lang: lang
|
161
|
+
}
|
162
|
+
else
|
163
|
+
#p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}"
|
164
|
+
end
|
165
|
+
idx_array[lang]=idx_array[lang].flatten
|
166
|
+
idx_array
|
167
|
+
end
|
168
|
+
end
|
169
|
+
class Index
|
170
|
+
def initialize(idx_array,the_idx)
|
171
|
+
@idx_array,@the_idx=idx_array,the_idx
|
172
|
+
@@the_idx_authors=@the_idx
|
173
|
+
end
|
174
|
+
def capital(txt)
|
175
|
+
txt[0].chr.capitalize + txt[1,txt.length]
|
176
|
+
end
|
177
|
+
def construct_book_author_index
|
178
|
+
idx_array=@idx_array
|
179
|
+
idx_array.each_pair do |lang,idx_arr|
|
180
|
+
@@the_idx_authors[lang] ||= {}
|
181
|
+
idx_arr.each do |idx|
|
182
|
+
idx[:author][:last_first_format_a].each do |author|
|
183
|
+
author=author.strip
|
184
|
+
if @@the_idx_authors[lang][author].is_a?(NilClass)
|
185
|
+
@@the_idx_authors[lang][author]={ md: [] }
|
186
|
+
end
|
187
|
+
@@the_idx_authors[lang][author][:md] << {
|
188
|
+
filename: idx[:filename],
|
189
|
+
file: idx[:file],
|
190
|
+
author: idx[:author],
|
191
|
+
title: idx[:title],
|
192
|
+
date: idx[:date],
|
193
|
+
page: idx[:page],
|
194
|
+
lang: idx[:lang]
|
195
|
+
}
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
@the_idx=@@the_idx_authors
|
200
|
+
end
|
201
|
+
end
|
202
|
+
class OutputIndex
|
203
|
+
require_relative 'i18n' # i18n.rb
|
204
|
+
def initialize(opt,the_idx)
|
205
|
+
@opt,@the_idx=opt,the_idx
|
206
|
+
@env=SiSU_Env::InfoEnv.new
|
207
|
+
@rc=SiSU_Env::GetInit.new.sisu_yaml.rc
|
208
|
+
@alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
|
209
|
+
@alph=@alphabet_list.dup
|
210
|
+
@letter=@alph.shift
|
211
|
+
end
|
212
|
+
def html_file_open
|
213
|
+
@the_idx.keys.each do |lng|
|
214
|
+
@output ||={}
|
215
|
+
@output[lng] ||={}
|
216
|
+
harvest_pth,file='',''
|
217
|
+
if @env.output_dir_structure.by? == :language
|
218
|
+
harvest_pth=@env.path.webserv + '/' \
|
219
|
+
+ @opt.base_stub + '/' \
|
220
|
+
+ lng + '/' \
|
221
|
+
+ 'manifest'
|
222
|
+
file="#{harvest_pth}/authors.html"
|
223
|
+
elsif @env.output_dir_structure.by? == :filetype
|
224
|
+
harvest_pth=@env.path.webserv + '/' \
|
225
|
+
+ @opt.base_stub + '/' \
|
226
|
+
+ 'manifest'
|
227
|
+
file="#{harvest_pth}/authors.#{lng}.html"
|
228
|
+
elsif @env.output_dir_structure.by? == :filename
|
229
|
+
harvest_pth=@env.path.webserv + '/' \
|
230
|
+
+ @opt.base_stub
|
231
|
+
file="#{harvest_pth}/authors.#{lng}.html"
|
232
|
+
end
|
233
|
+
FileUtils::mkdir_p(harvest_pth) \
|
234
|
+
unless FileTest.directory?(harvest_pth)
|
235
|
+
fileinfo=(@opt.act[:verbose][:set]==:on \
|
236
|
+
|| @opt.act[:verbose_plus][:set]==:on \
|
237
|
+
|| @opt.act[:urls_selected][:set]==:on \
|
238
|
+
|| @opt.act[:maintenance][:set]==:on) \
|
239
|
+
? ("file://#{file}") : ''
|
240
|
+
SiSU_Screen::Ansi.new(
|
241
|
+
@opt.act[:color_state][:set],
|
242
|
+
"harvest authors (#{@opt.files.length} files)",
|
243
|
+
fileinfo
|
244
|
+
).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on
|
245
|
+
@output[lng][:html]=File.new(file,'w')
|
246
|
+
end
|
247
|
+
end
|
248
|
+
def html_file_close
|
249
|
+
@the_idx.keys.each do |lng|
|
250
|
+
@output[lng][:html].close
|
251
|
+
@output[lng][:html_mnt].close \
|
252
|
+
if @output[lng][:html_mnt].is_a?(File)
|
253
|
+
end
|
254
|
+
end
|
255
|
+
def html_print
|
256
|
+
def html_songsheet
|
257
|
+
html_file_open
|
258
|
+
html_head
|
259
|
+
html_alph
|
260
|
+
html_body
|
261
|
+
html_tail
|
262
|
+
html_file_close
|
263
|
+
end
|
264
|
+
def html_head_adjust(lng,type='')
|
265
|
+
css_path,topics='',''
|
266
|
+
if @env.output_dir_structure.by? == :language
|
267
|
+
css_path=(type !~/maintenance/) \
|
268
|
+
? '../../_sisu/css/harvest.css'
|
269
|
+
: 'harvest.css'
|
270
|
+
topics='topics.html'
|
271
|
+
elsif @env.output_dir_structure.by? == :filetype
|
272
|
+
css_path=(type !~/maintenance/) \
|
273
|
+
? '../_sisu/css/harvest.css'
|
274
|
+
: 'harvest.css'
|
275
|
+
topics="topics.#{lng}.html"
|
276
|
+
elsif @env.output_dir_structure.by? == :filename
|
277
|
+
css_path=(type !~/maintenance/) \
|
278
|
+
? './_sisu/css/harvest.css'
|
279
|
+
: 'harvest.css'
|
280
|
+
topics="topics.#{lng}.html"
|
281
|
+
end
|
282
|
+
ln=SiSU_i18n::Languages.new.language.list
|
283
|
+
harvest_languages=''
|
284
|
+
@the_idx.keys.each do |lg|
|
285
|
+
if @env.output_dir_structure.by? == :language
|
286
|
+
harvest_pth="../../#{lg}/manifest"
|
287
|
+
file="#{harvest_pth}/authors.html"
|
288
|
+
elsif @env.output_dir_structure.by? == :filetype
|
289
|
+
harvest_pth='.'
|
290
|
+
file="#{harvest_pth}/authors.#{lg}.html"
|
291
|
+
elsif @env.output_dir_structure.by? == :filename
|
292
|
+
harvest_pth='.'
|
293
|
+
file="#{harvest_pth}/authors.#{lg}.html"
|
294
|
+
end
|
295
|
+
l=ln[lg][:t]
|
296
|
+
harvest_languages +=
|
297
|
+
%{<a href="#{file}">#{l}</a> }
|
298
|
+
end
|
299
|
+
sv=SiSU_Env::InfoVersion.instance.get_version
|
300
|
+
if @env.output_dir_structure.by? == :language
|
301
|
+
home_pth='../..'
|
302
|
+
output_structure_by=
|
303
|
+
'(output organised by language & filetype)'
|
304
|
+
elsif @env.output_dir_structure.by? == :filetype
|
305
|
+
home_pth='..'
|
306
|
+
output_structure_by=
|
307
|
+
'(output organised by filetype)'
|
308
|
+
elsif @env.output_dir_structure.by? == :filename
|
309
|
+
home_pth='.'
|
310
|
+
output_structure_by=
|
311
|
+
'(output organised by filename)'
|
312
|
+
else
|
313
|
+
home_pth='.'
|
314
|
+
output_structure_by='(output organised by ?)'
|
315
|
+
end
|
316
|
+
<<WOK
|
317
|
+
<!DOCTYPE html>
|
318
|
+
<html>
|
319
|
+
<head>
|
320
|
+
<meta charset="utf-8">
|
321
|
+
<title>SiSU Metadata Harvest - Authors</title>
|
322
|
+
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
323
|
+
<meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" />
|
324
|
+
<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" />
|
325
|
+
<meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" />
|
326
|
+
<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" />
|
327
|
+
<link href="#{css_path}" rel="stylesheet" >
|
328
|
+
<link rel="shortcut icon" href="../_sisu/image/rb7.ico" />
|
329
|
+
</head>
|
330
|
+
<body lang="en" xml:lang="en">
|
331
|
+
<a name="top" id="top"></a>
|
332
|
+
<a name="up" id="up"></a>
|
333
|
+
<a name="start" id="start"></a>
|
334
|
+
<h1>SiSU Metadata Harvest - Authors #{output_structure_by}</h1>
|
335
|
+
<p>[<a href="#{home_pth}/index.html"> HOME </a>] also see <a href="#{topics}">SiSU Metadata Harvest - Topics</a></p>
|
336
|
+
<p>#{@env.widget_static.search_form}</p>
|
337
|
+
<hr />
|
338
|
+
<p class="tiny">#{harvest_languages}</p>
|
339
|
+
<hr />
|
340
|
+
WOK
|
341
|
+
end
|
342
|
+
def html_head
|
343
|
+
@the_idx.keys.each do |lng|
|
344
|
+
@output[lng][:html_mnt] \
|
345
|
+
<< html_head_adjust(lng,'maintenance') \
|
346
|
+
if @opt.act[:maintenance][:set]==:on
|
347
|
+
@output[lng][:html] \
|
348
|
+
<< html_head_adjust(lng)
|
349
|
+
end
|
350
|
+
end
|
351
|
+
def html_alph
|
352
|
+
a=[]
|
353
|
+
a << '<p>'
|
354
|
+
@alph.each do |x|
|
355
|
+
a << ((x =~/[0-9]/) \
|
356
|
+
? ''
|
357
|
+
: %{<a href="##{x}">#{x}</a>, })
|
358
|
+
end
|
359
|
+
a=a.join
|
360
|
+
@the_idx.keys.each do |lng|
|
361
|
+
@output[lng][:html_mnt] << a \
|
362
|
+
if @opt.act[:maintenance][:set]==:on
|
363
|
+
@output[lng][:html] << a
|
364
|
+
end
|
365
|
+
end
|
366
|
+
def html_tail
|
367
|
+
a =<<WOK
|
368
|
+
<hr />
|
369
|
+
<a name="bottom" id="bottom"></a>
|
370
|
+
<a name="down" id="down"></a>
|
371
|
+
<a name="end" id="end"></a>
|
372
|
+
<a name="finish" id="finish"></a>
|
373
|
+
<a name="stop" id="stop"></a>
|
374
|
+
<a name="credits"></a>
|
375
|
+
#{SiSU_Proj_HTML::Bits.new.credits_sisu}
|
376
|
+
</body>
|
377
|
+
</html>
|
378
|
+
WOK
|
379
|
+
@the_idx.keys.each do |lng|
|
380
|
+
@output[lng][:html_mnt] << a \
|
381
|
+
if @output[lng][:html_mnt].is_a?(File)
|
382
|
+
@output[lng][:html] << a
|
383
|
+
end
|
384
|
+
end
|
385
|
+
def do_html(lng,html)
|
386
|
+
@output[lng][:html_mnt] << html \
|
387
|
+
if @output[lng][:html_mnt].is_a?(File)
|
388
|
+
@output[lng][:html] << html
|
389
|
+
end
|
390
|
+
def do_string_name(lng,attrib,string)
|
391
|
+
f=/^(\S)/.match(string[0])[1]
|
392
|
+
if @lng != lng
|
393
|
+
@alph=@alphabet_list.dup
|
394
|
+
@letter=@alph.shift
|
395
|
+
@lng = lng
|
396
|
+
end
|
397
|
+
if @letter < f
|
398
|
+
while @letter < f
|
399
|
+
if @alph.length > 0
|
400
|
+
@letter=@alph.shift
|
401
|
+
if @output[lng][:html_mnt].is_a?(File)
|
402
|
+
@output[lng][:html_mnt] \
|
403
|
+
<< %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
|
404
|
+
end
|
405
|
+
@output[lng][:html] \
|
406
|
+
<< %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
|
407
|
+
else break
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|
412
|
+
def html_body
|
413
|
+
the_idx=@the_idx
|
414
|
+
the_idx.each_pair do |lng,lng_array|
|
415
|
+
lng_array.sort.each do |a|
|
416
|
+
do_string_name(lng,'',a)
|
417
|
+
name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
|
418
|
+
x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>}
|
419
|
+
if @output[lng][:html_mnt].is_a?(File)
|
420
|
+
@output[lng][:html_mnt] << x
|
421
|
+
end
|
422
|
+
@output[lng][:html] << x
|
423
|
+
lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert
|
424
|
+
works=[]
|
425
|
+
a[1][:md].each do |i|
|
426
|
+
manifest_at=if @env.output_dir_structure.by? == :language
|
427
|
+
i[:file] + Sfx[:html]
|
428
|
+
elsif @env.output_dir_structure.by? == :filetype
|
429
|
+
i[:file] + lang_code_insert + Sfx[:html]
|
430
|
+
elsif @env.output_dir_structure.by? == :filename
|
431
|
+
'./' + i[:file] + '/' + i[:page]
|
432
|
+
else '' #error
|
433
|
+
end
|
434
|
+
work=[
|
435
|
+
"#{i[:date]} #{i[:title]}",
|
436
|
+
%{<p class="publication">#{i[:date]} <a href="#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]}</p>}
|
437
|
+
]
|
438
|
+
works<<=(@output[lng][:html_mnt].is_a?(File)) \
|
439
|
+
? (work.concat([%{<p class="publication">[<a href="#{i[:file]}.sst">src</a>] #{i[:date]} <a href="file://#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]} -- [<a href="#{i[:file]}.sst">#{i[:file]}.sst</a>]</p>}]))
|
440
|
+
: work
|
441
|
+
end
|
442
|
+
works.sort_by {|y| y[0]}.each do |z|
|
443
|
+
@output[lng][:html] << z[1]
|
444
|
+
@output[lng][:html_mnt] << z[2] \
|
445
|
+
if @output[lng][:html_mnt].is_a?(File)
|
446
|
+
end
|
447
|
+
end
|
448
|
+
end
|
449
|
+
end
|
450
|
+
self
|
451
|
+
end
|
452
|
+
def screen_print
|
453
|
+
def cycle
|
454
|
+
the_idx=@the_idx
|
455
|
+
the_idx.sort.each do |a|
|
456
|
+
puts a[0]
|
457
|
+
a[1][:md].each do |x|
|
458
|
+
puts "\t" + x[:file]
|
459
|
+
end
|
460
|
+
end
|
461
|
+
end
|
462
|
+
self
|
463
|
+
end
|
464
|
+
end
|
465
|
+
end
|
466
|
+
__END__
|