docbook_status 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,143 +1,64 @@
1
- # -*- encoding: utf-8 -*-
1
+ # -*- encoding:utf-8 -*-
2
+ module DocbookStatus
2
3
 
3
- require 'xml'
4
+ # :stopdoc:
5
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
6
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
7
+ # :startdoc:
4
8
 
5
- # Analyzes DocBook 5 documents for document structure (sections) and text length.
6
- #
7
- class DocbookStatus
8
-
9
- # :stopdoc
10
- #
11
- PATH = File.expand_path('../..', __FILE__) + File::SEPARATOR
12
- LIBPATH = File.expand_path('..', __FILE__) + File::SEPARATOR
13
- VERSION = File.read(PATH + '/version.txt').strip
14
- HOME = File.expand_path(ENV['HOME'] || ENV['USERPROFILE'])
15
- #
16
- # :startdoc
17
-
18
- # The DocBook 5 namespace URL
19
- DOCBOOK_NS = 'http://docbook.org/ns/docbook'
20
- # The XInclude namespace URL
21
- XINCLUDE_NS = 'http://www.w3.org/2001/XInclude'
22
-
23
- # Elements whose contents is counted as text
24
- @@text_elements = ['para','simpara','formalpara']
25
-
26
- # Section elements, following the list given in http://docbook.org/tdg5/en/html/ch02.html#roots
27
- # except for the refsect... elements.
28
- @@section_elements = %w[
29
- acknowledgements appendix article
30
- bibliography book
31
- chapter colophon
32
- dedication
33
- glossary
34
- index
35
- preface
36
- section sect1 sect2 sect3 sect4 set simplesect
37
- toc
38
- ]
39
-
40
- def initialize
41
- @sections = []
42
- end
43
-
44
- # Returns the version of docbook_status
9
+ # Returns the version string for the library.
45
10
  #
46
11
  def self.version
47
- VERSION
48
- end
49
-
50
- # Counts the words in the contents of the given node. _Word_ in this
51
- # context means something that is delimited by _space_ charactes and starts with
52
- # _word_ characters (in the regexp sense).
53
- #
54
- def count_words(node)
55
- words = node.content.strip.split(/[[:space:]]+/).find_all {|w| w =~ /\w+/}
56
- words.size
12
+ @version ||= File.read(path('version.txt')).strip
57
13
  end
58
14
 
59
- # Find the _title_ of the current section. That element is either
60
- # directly following or inside an _info_ element. Return the empty
61
- # string if no title can be found.
15
+ # Returns the library path for the module. If any arguments are given,
16
+ # they will be joined to the end of the libray path using
17
+ # <tt>File.join</tt>.
62
18
  #
63
- def find_section_title(node)
64
- title = node.find_first('./db:title')
65
- if title.nil?
66
- title = node.find_first './db:info/db:title'
67
- end
68
- if title.nil?
69
- ""
70
- else
71
- title.content
19
+ def self.libpath( *args, &block )
20
+ rv = args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
21
+ if block
22
+ begin
23
+ $LOAD_PATH.unshift LIBPATH
24
+ rv = block.call
25
+ ensure
26
+ $LOAD_PATH.shift
27
+ end
72
28
  end
29
+ return rv
73
30
  end
74
31
 
75
- # Check the document elements for content and type recursively,
76
- # starting at the current node. Returns an array with paragraph and
77
- # section maps.
32
+ # Returns the lpath for the module. If any arguments are given,
33
+ # they will be joined to the end of the path using
34
+ # <tt>File.join</tt>.
78
35
  #
79
- def check_node(node, level, ctr)
80
- if (@@text_elements.include? node.name)
81
- ctr << {:type => :para, :level => level, :words => count_words(node)}
82
- elsif (@@section_elements.include? node.name)
83
- title = find_section_title(node)
84
- ctr << {:type => :section, :level => level, :title => title, :name => node.name}
85
- end
86
- node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
87
- ctr
88
- end
89
-
90
- # Check whether the document has a DocBook default namespace
91
- def is_docbook?(doc)
92
- dbns = doc.root.namespaces.default
93
- (!dbns.nil? && (dbns.href.casecmp(DOCBOOK_NS) == 0))
94
- end
95
-
96
- # Check whether the document has a XInclude namespace
97
- def has_xinclude?(doc)
98
- ret = false
99
- doc.root.namespaces.each do |ns|
100
- if (ns.href.casecmp(XINCLUDE_NS) == 0)
101
- ret = true
102
- break
36
+ def self.path( *args, &block )
37
+ rv = args.empty? ? PATH : ::File.join(PATH, args.flatten)
38
+ if block
39
+ begin
40
+ $LOAD_PATH.unshift PATH
41
+ rv = block.call
42
+ ensure
43
+ $LOAD_PATH.shift
103
44
  end
104
45
  end
105
- ret
46
+ return rv
106
47
  end
107
48
 
108
- # Searches the XML document for sections and word counts. Returns an
109
- # array of sections with their word counts.
49
+ # Utility method used to require all files ending in .rb that lie in the
50
+ # directory below this file that has the same name as the filename passed
51
+ # in. Optionally, a specific _directory_ name can be passed in such that
52
+ # the _filename_ does not have to be equivalent to the directory.
110
53
  #
111
- def analyze_document(doc)
112
- # Add a namespace declaration for XPath expressions
113
- doc.root.namespaces.default_prefix = 'db'
114
- # Analyze the document starting with the root node
115
- doc_maps = check_node(doc.root,0,[])
116
- @sections = []
117
- section_name = doc_maps[0][:title]
118
- section_type = doc_maps[0][:name]
119
- section_ctr = 0
120
- section_level = 0
121
- doc_ctr = 0
122
- #puts doc_maps.inspect
123
- xms = doc_maps.drop(1)
124
- # Compute word counts per section
125
- xms.each do |m|
126
- if (m[:type] == :para)
127
- doc_ctr += m[:words]
128
- section_ctr += m[:words]
129
- else
130
- @sections << [section_name,section_ctr,section_level,section_type]
131
- section_name = m[:title]
132
- section_ctr = 0
133
- section_level = m[:level]
134
- section_type = m[:name]
135
- end
136
- end
137
- @sections << [section_name,section_ctr,section_level,section_type]
138
- # Put the document word count near the document type
139
- @sections[0][1] = doc_ctr
140
- @sections
54
+ def self.require_all_libs_relative_to( fname, dir = nil )
55
+ dir ||= ::File.basename(fname, '.*')
56
+ search_me = ::File.expand_path(
57
+ ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
58
+
59
+ Dir.glob(search_me).sort.each {|rb| require rb}
141
60
  end
142
61
 
143
- end
62
+ end # module DocbookStatus
63
+
64
+ DocbookStatus.require_all_libs_relative_to(__FILE__)
@@ -0,0 +1,108 @@
1
+ # -*- encoding:utf-8 -*-
2
+
3
+ require 'yaml'
4
+ module DocbookStatus
5
+
6
+ # Manages the history of writing progress in two modes. In session
7
+ # or demon mode the history shows progress for the user session. In
8
+ # normal mode the history is only maintained for calendar days,
9
+ # weeks, months.
10
+ #
11
+ # The writing progress can (but must not) measured with these optional
12
+ # items:
13
+ # * start date (date of initialization)
14
+ # * scheduled end date
15
+ # * total word count goal
16
+ # * daily word count goal
17
+ #
18
+ # * file name
19
+ # * goal total
20
+ # * goal daily
21
+ # * start date
22
+ # * planned end date
23
+ # current entries
24
+ # * timestamp
25
+ # * word count
26
+ # archive entries
27
+ # * date
28
+ # * start
29
+ # * end
30
+ # * min
31
+ # * max
32
+ # * ctr (number of entries for the day)
33
+ #
34
+ class History
35
+
36
+ # History file, YAML format
37
+ HISTORY_FILE = 'dbs_work.yml'
38
+
39
+ # Does the history file exist?
40
+ def self.exists?()
41
+ File.exists?(HISTORY_FILE)
42
+ end
43
+
44
+ # Load the exisiting writing history
45
+ def initialize(name,end_planned=nil,goal_total=0,goal_daily=0)
46
+ if File.exists?(HISTORY_FILE)
47
+ @history = YAML.load_file(HISTORY_FILE)
48
+ else
49
+ @history = {:file => name,
50
+ :goal => {
51
+ :start => Date.today,
52
+ :end => end_planned,
53
+ :goal_total => goal_total,
54
+ :goal_daily => goal_daily},
55
+ :current => [],
56
+ :archive => {}}
57
+ end
58
+ end
59
+
60
+ def planned_end(date)
61
+ @history[:goal][:end]=date
62
+ end
63
+
64
+ def total_words(tw)
65
+ @history[:goal][:goal_total]=tw
66
+ end
67
+
68
+ def daily_words(tw)
69
+ @history[:goal][:goal_daily]=tw
70
+ end
71
+
72
+ # Add to the history
73
+ def add(ts,word_count)
74
+ # FIXME add demon mode
75
+ #@history[:current] << progress
76
+ #archive
77
+ k = ts.to_date
78
+ unless (@history[:archive][k].nil?)
79
+ @history[:archive][k][:min] = word_count if @history[:archive][k][:min] > word_count
80
+ @history[:archive][k][:max] = word_count if @history[:archive][k][:max] < word_count
81
+ @history[:archive][k][:end] = word_count
82
+ @history[:archive][k][:ctr] += 1
83
+ else
84
+ @history[:archive][k] = {:min => word_count, :max => word_count, :start => word_count, :end => word_count, :ctr => 1}
85
+ end
86
+ end
87
+
88
+ # Is there already a history?
89
+ def history?
90
+ @history[:archive].length != 0
91
+ end
92
+
93
+ # Convenience - returns the statistics for today
94
+ def today
95
+ @history[:archive][Date.today]
96
+ end
97
+
98
+ # Return the goals
99
+ def goals
100
+ @history[:goal]
101
+ end
102
+
103
+ # Save the writing history
104
+ def save
105
+ File.open(HISTORY_FILE, 'w') {|f| YAML.dump(@history,f)}
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,258 @@
1
+ # -*- encoding:utf-8 -*-
2
+
3
+ require 'xml'
4
+ module DocbookStatus
5
+
6
+ # Analyzes DocBook 5 documents for document structure (sections) and text length.
7
+ #
8
+ class Status
9
+
10
+ # The DocBook 5 namespace URL
11
+ #
12
+ DOCBOOK_NS = 'http://docbook.org/ns/docbook'
13
+
14
+ # The XInclude namespace URL
15
+ #
16
+ XINCLUDE_NS = 'http://www.w3.org/2001/XInclude'
17
+
18
+ # Elements whose contents is counted as text. The _formalpara_
19
+ # elements are included implicitly because they contain _para_ child
20
+ # elements.
21
+ #
22
+ @@text_elements = ['para','simpara']
23
+
24
+ # Section elements, following the list given in http://docbook.org/tdg5/en/html/ch02.html#roots
25
+ # except for the refsect... elements.
26
+ #
27
+ @@section_elements = %w[
28
+ acknowledgements appendix article
29
+ bibliography book
30
+ chapter colophon
31
+ dedication
32
+ glossary
33
+ index
34
+ part preface
35
+ section sect1 sect2 sect3 sect4 set simplesect
36
+ toc
37
+ ]
38
+
39
+ attr_reader :doc
40
+
41
+ def initialize(fname=nil)
42
+ @sections = []
43
+ @remarks = []
44
+ @source = fname
45
+ @source_dir = fname.nil? ? nil : File.dirname(fname)
46
+ @source_file = fname.nil? ? nil : File.basename(fname)
47
+ @doc = nil
48
+ XML.default_line_numbers=true
49
+ end
50
+
51
+
52
+ # Return the remark-elements found in the document. If _keyword_ is
53
+ # nil then return all remarks, else only the ones with the right
54
+ # keyword.
55
+ #
56
+ def remarks(keyword=nil)
57
+ if keyword.nil?
58
+ @remarks
59
+ else
60
+ ukw = keyword.upcase
61
+ @remarks.find_all {|r| r[:keyword] == (ukw)}
62
+ end
63
+ end
64
+
65
+ # Counts the words in the contents of the given node. _Word_ in this
66
+ # context means something that is delimited by _space_ charactes and starts with
67
+ # _word_ characters (in the regexp sense).
68
+ #
69
+ def count_words(node)
70
+ words = node.content.strip.split(/[[:space:]]+/).find_all {|w| w =~ /\w+/}
71
+ words.size
72
+ end
73
+
74
+ # Counts the words in the contents of the given node.
75
+ # It is assumed that the node is a kind of pure content (a paragraph) and therefore everything in it
76
+ # should be included in the word count. An exception to this are
77
+ # _remark_ elements, which are conisdered as comments, not meant for final publication.
78
+ #
79
+ def count_content_words(node)
80
+ ws = count_words(node)
81
+ # Count the remark text contained in the paragraph and subtract it from the real thing
82
+ wsr = node.find('db:remark').reduce(0) {|m,r| m+count_words(r)}
83
+ ws - wsr
84
+ end
85
+
86
+ # Find the _title_ of the current section. That element is either
87
+ # directly following or inside an _info_ element. Return the empty
88
+ # string if no title can be found.
89
+ #
90
+ def find_section_title(node)
91
+ title = node.find_first('./db:title')
92
+ if title.nil?
93
+ title = node.find_first './db:info/db:title'
94
+ end
95
+ if title.nil?
96
+ ""
97
+ else
98
+ title.content
99
+ end
100
+ end
101
+
102
+ # Check the document elements for content and type recursively,
103
+ # starting at the current node. Returns an array with paragraph and
104
+ # section maps.
105
+ #
106
+ def check_node(node, level, ctr)
107
+ if (@@text_elements.include? node.name)
108
+ ctr << {:type => :para, :level => level, :words => count_content_words(node)}
109
+ elsif (@@section_elements.include? node.name)
110
+ title = find_section_title(node)
111
+ ctr << {:type => :section, :level => level, :title => title, :name => node.name}
112
+ node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
113
+ else
114
+ node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
115
+ end
116
+
117
+ ctr
118
+ end
119
+
120
+ # Check whether the document has a DocBook default namespace
121
+ def is_docbook?(doc)
122
+ dbns = doc.root.namespaces.default
123
+ (!dbns.nil? && (dbns.href.casecmp(DOCBOOK_NS) == 0))
124
+ end
125
+
126
+ # Check whether the document has a XInclude namespace
127
+ def has_xinclude?(doc)
128
+ ret = false
129
+ doc.root.namespaces.each do |ns|
130
+ if (ns.href.casecmp(XINCLUDE_NS) == 0)
131
+ ret = true
132
+ break
133
+ end
134
+ end
135
+ ret
136
+ end
137
+
138
+ # Finds and returns all XInclude files/URLs in a document.
139
+ #
140
+ # OPTIMIZE implement xpointer and fallback handling for
141
+ # xi:include? see http://www.w3.org/TR/xinclude/
142
+ #
143
+ def find_xincludes(doc)
144
+ if has_xinclude?(doc)
145
+ xincs = doc.find('//xi:include', "xi:"+XINCLUDE_NS)
146
+ xfiles = xincs.map {|x| x.attributes['href'] }
147
+ (xfiles << xfiles.map {|xf|
148
+ xfn = File.exists?(xf) ? xf : File.expand_path(xf,File.dirname(doc.root.base_uri))
149
+ xdoc = XML::Document.file(xfn)
150
+ find_xincludes(xdoc)
151
+ }).flatten
152
+ else
153
+ []
154
+ end
155
+ end
156
+
157
+ # Find all remark elements in the document and return a map for
158
+ # every such element. The map contains:
159
+ #
160
+ # * keyword: if the first word of the content is uppercase that is the keyword, else _REMARK_
161
+ # * text: the content of the remark element, minus the keyword
162
+ # * file: the name of the source file
163
+ # * line: the line number in the source file
164
+ #
165
+ # OPTIMIZE look for 'role' attributes as keywords?
166
+ #
167
+ def find_remarks_in_doc(doc,source)
168
+ rems = doc.find('//db:remark')
169
+ rems.map {|rem|
170
+ c = rem.content.strip
171
+ kw = 'REMARK'
172
+ if rem.first.text?
173
+ kw1 = c.match('^([[:upper:]]+)([[:space:][:punct:]]|$)')
174
+ unless kw1.nil?
175
+ kw = kw1[1]
176
+ c = kw1.post_match.lstrip
177
+ end
178
+ end
179
+ # TODO XPath integrieren? :path => rem.path, :parent => rem.parent.path,
180
+ {:keyword => kw, :text => c, :file=>source, :line => rem.line_num}
181
+ }
182
+ end
183
+
184
+ # Finds the remarks by looking through all the Xincluded files
185
+ #
186
+ def find_remarks(filter=[])
187
+ if (@source.nil?)
188
+ rfiles = find_xincludes(@doc)
189
+ else
190
+ @doc = XML::Document.file(@source)
191
+ rfiles = [@source_file] + find_xincludes(@doc)
192
+ end
193
+ @remarks = rfiles.map {|rf|
194
+ ind = XML::Document.file(File.expand_path(rf,@source.nil? ? '.' : @source_dir))
195
+ ind.root.namespaces.default_prefix = 'db'
196
+ rems = find_remarks_in_doc(ind, rf)
197
+ rems
198
+ }.flatten
199
+ if (filter.empty?)
200
+ @remarks
201
+ else
202
+ filter.map {|f|
203
+ @remarks.find_all {|r| f.casecmp(r[:keyword]) == 0}
204
+ }.flatten
205
+ end
206
+ end
207
+
208
+ # Searches the XML document for sections and word counts. Returns an
209
+ # array of sections (map) with title, word count, section level and DocBook tag.
210
+ #
211
+ def analyze_document(doc)
212
+ # Add a namespace declaration for XPath expressions
213
+ doc.root.namespaces.default_prefix = 'db'
214
+ # Analyze the document starting with the root node
215
+ doc_maps = check_node(doc.root,0,[])
216
+ @sections = []
217
+ section_name = doc_maps[0][:title]
218
+ section_type = doc_maps[0][:name]
219
+ section_ctr = 0
220
+ section_level = 0
221
+ doc_ctr = 0
222
+ #puts doc_maps.inspect
223
+ xms = doc_maps.drop(1)
224
+ # Compute word counts per section
225
+ xms.each do |m|
226
+ if (m[:type] == :para)
227
+ doc_ctr += m[:words]
228
+ section_ctr += m[:words]
229
+ else
230
+ @sections << {:title => section_name, :words => section_ctr, :level => section_level, :tag => section_type}
231
+ section_name = m[:title]
232
+ section_ctr = 0
233
+ section_level = m[:level]
234
+ section_type = m[:name]
235
+ end
236
+ end
237
+ @sections << {:title => section_name, :words => section_ctr, :level => section_level, :tag => section_type}
238
+ # Put the document word count near the document type
239
+ @sections[0][:words] = doc_ctr
240
+ @sections
241
+ end
242
+
243
+ # Open the XML document, check for the DocBook5 namespace and finally
244
+ # apply Xinclude tretement to it, if it has a XInclude namespace.
245
+ # Returns a map with the file name, the file's modification time, and the section structure.
246
+ #
247
+ def analyze_file
248
+ full_name = File.expand_path(@source)
249
+ changed = File.ctime(@source)
250
+ @doc = XML::Document.file(@source)
251
+ raise ArgumentError, "Error: #{@source} is apparently not DocBook 5." unless is_docbook?(@doc)
252
+ @doc.xinclude if has_xinclude?(@doc)
253
+ sections = analyze_document(@doc)
254
+ {:file => full_name, :modified => changed, :sections => sections}
255
+ end
256
+
257
+ end
258
+ end