passagemath-sws2rst 10.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,198 @@
1
+ #!python
2
+ # -*- coding: utf-8 -*-
3
+ r"""
4
+ sage-sws2rst
5
+ ============
6
+
7
+ Translate a Sage worksheet file (.sws) into an rst file. The result is
8
+ saved in the current working directory.
9
+
10
+ Usage::
11
+
12
+ sage --sws2rst [-h] <source sws file>
13
+
14
+ Print the help message::
15
+
16
+ sage --sws2rst -h
17
+
18
+ EXAMPLES::
19
+
20
+ sage --sws2rst file.sws
21
+
22
+ AUTHORS:
23
+
24
+ - Pablo Angulo (January 2011): Initial version
25
+ - Karl-Dieter Crisman (June 2012): Documentation
26
+ and minor refinements
27
+ - Karl-Dieter Crisman (November 2014): Correct use of temporary files,
28
+ see :trac:`17308`.
29
+ """
30
+
31
+ #*****************************************************************************
32
+ # Copyright (C) 2011 Pablo Angulo
33
+ # Copyright (C) 2012-2014 Karl-Dieter Crisman
34
+ #
35
+ # This program is free software: you can redistribute it and/or modify
36
+ # it under the terms of the GNU General Public License as published by
37
+ # the Free Software Foundation, either version 2 of the License, or
38
+ # (at your option) any later version.
39
+ # https://www.gnu.org/licenses/
40
+ #*****************************************************************************
41
+
42
+ import sys
43
+ import tarfile
44
+ import os
45
+ import shutil
46
+ import codecs
47
+ import tempfile
48
+ from sage_sws2rst.worksheet2rst import worksheet2rst
49
+
50
+ from optparse import OptionParser
51
+
52
+
53
+ def process_sws(sws_file):
54
+ """
55
+ Process the ``.sws`` file ``sws_file`` and create an ``.rst`` file
56
+ (and possible media files) in the current working directory.
57
+ """
58
+ base_name = os.path.basename(os.path.splitext(sws_file)[0])
59
+ base_name = base_name.replace(' ','_')
60
+
61
+ tempdir = tempfile.mkdtemp()
62
+ try:
63
+ with tarfile.open(sws_file, mode='r:bz2') as sws_file:
64
+ sws_file.extractall(tempdir)
65
+ worksheet_dir = os.path.join(tempdir, 'sage_worksheet')
66
+ if not os.path.isdir(worksheet_dir):
67
+ raise RuntimeError("Worksheeet file %r does not contain a 'sage_worksheet' directory" % sws_file)
68
+ process_worksheet(worksheet_dir, base_name)
69
+ finally:
70
+ shutil.rmtree(tempdir)
71
+
72
+ def process_worksheet(worksheet_dir, base_name):
73
+ """
74
+ Process the extracted worksheet directory ``worksheet_dir`` and
75
+ create the ``.rst`` and media files with base name ``base_name``.
76
+
77
+ Files are moved from ``worksheet_dir``, so make sure these are
78
+ temporary files!
79
+ """
80
+ #Images
81
+ images_dir = base_name + '_media'
82
+ try:
83
+ os.mkdir(images_dir)
84
+ except OSError:
85
+ if not os.path.isdir(images_dir):
86
+ raise
87
+
88
+ #"data" dir
89
+ data_path = os.path.join(worksheet_dir, 'data')
90
+ if os.path.isdir(data_path):
91
+ for image in os.listdir(data_path):
92
+ shutil.move(os.path.join(data_path, image), os.path.join(images_dir, image.replace(' ','_')))
93
+
94
+ #cells
95
+ cells_path = os.path.join(worksheet_dir, 'cells')
96
+ if os.path.isdir(cells_path):
97
+ for cell in os.listdir(cells_path):
98
+ cell_path = os.path.join(cells_path, cell)
99
+ for image in os.listdir(cell_path):
100
+ if os.path.isfile(os.path.join(cell_path, image)):
101
+ shutil.move(os.path.join(cell_path, image),
102
+ os.path.join(images_dir, 'cell_%s_%s'%(cell,image)))
103
+ # could be Jmol image directory - code for future
104
+ #elif os.path.isdir(os.path.join(cell_path, image)):
105
+ # if image == '.jmol_images':
106
+ # for jmolimg in os.listdir(os.path.join(cell_path, image)):
107
+ # shutil.move(os.path.join(cell_path, image, jmolimg),
108
+ # os.path.join(images_dir, 'cell_%s_%s'%(cell,jmolimg)))
109
+
110
+ #read html file, parse it, write rst file
111
+ html_file = os.path.join(worksheet_dir, 'worksheet.html')
112
+ with codecs.open(html_file, mode='r', encoding='utf-8') as f:
113
+ html_text = f.read()
114
+
115
+ rst_text = worksheet2rst(html_text, images_dir=images_dir)
116
+ rst_file = base_name + '.rst'
117
+
118
+ with codecs.open(rst_file, mode='w', encoding='utf-8') as out_file:
119
+ out_file.write(rst_text)
120
+
121
+ print("File at", rst_file)
122
+ print("Image directory at", images_dir)
123
+
124
+
125
+ # Set the parser
126
+ usage = r"""
127
+
128
+ sage --sws2rst [options] <source sws file> ...
129
+
130
+ Translate a Sage worksheet file (.sws) into an reStructuredText
131
+ (.rst) file. At least one sws file argument is required; all sws
132
+ files will be parsed and translated. Spaces in the names of the
133
+ worksheet will be converted to underscores. The resulting files will
134
+ be stored in the current working directory.
135
+
136
+ Examples:
137
+
138
+ sage --sws2rst file.sws
139
+ sage --sws2rst file1.sws file2.sws file3.sws
140
+ sage --sws2rst -h # this help message prints
141
+ sage --sws2rst --sphinxify # information about how to use
142
+ # Sphinx to compile your rst file
143
+ """
144
+
145
+ sphinxify_text = r"""
146
+
147
+ Once you have made your rst file, what can you do with it?
148
+
149
+ If this is a file which is likely to become part of the Sage
150
+ standard documentation, you will want to edit the appropriate
151
+ file in $SAGE_ROOT/src/doc to include your file, or
152
+ simply include your file as appropriate.
153
+
154
+ However, you may simply want to make great-looking documentation
155
+ for some other purpose out of your worksheet. The following
156
+ steps are one way to do so.
157
+
158
+ - Assume that the generated .rst file is ``My_Project.rst``.
159
+ - Make a folder somewhere convenient to compile in, say, ``MyProject``.
160
+ - Then move your .rst file into that folder, and cd into it.
161
+ - Now the key is to use Sage's shell to run Sphinx on it! Run ``sage --sh``.
162
+ - Then type ``sphinx-quickstart`` and follow the instructions in the
163
+ Sphinx tutorial [1]_. You will probably want to choose to render math
164
+ with MathJax [2]_, but you can accept the defaults for the other options.
165
+ - Finally, edit ``index.rst`` by adding ``My_Project`` in the table of
166
+ contents, as detailed in the Sphinx tutorial [3]_.
167
+ - If you now type ``make html`` you should get a beautiful-looking web page
168
+ in ``_build/html``. If you did not have a header at the top of your worksheet,
169
+ you may get an error, but you can ignore this.
170
+
171
+ REFERENCES:
172
+
173
+ .. [1] Getting Started,
174
+ https://www.sphinx-doc.org/en/master/usage/quickstart.html
175
+ .. [2] MathJax,
176
+ http://www.mathjax.org/
177
+ .. [3] Defining Document Structure, Getting Started,
178
+ https://www.sphinx-doc.org/en/master/usage/quickstart.html#defining-document-structure"""
179
+
180
+ parser = OptionParser(usage=usage)
181
+ parser.add_option("--sphinxify",
182
+ action="store_true", dest="sphinxify",
183
+ help="Print information about how to use Sphinx to compile your rst file, then exit.")
184
+ (options, args) = parser.parse_args()
185
+
186
+ # Parse option
187
+ if options.sphinxify:
188
+ print(sphinxify_text)
189
+ sys.exit(0)
190
+
191
+ # Parse arguments
192
+ if len(args) < 1:
193
+ parser.print_usage()
194
+ sys.exit(1)
195
+
196
+ for file_name in args:
197
+ print("Processing", file_name)
198
+ process_sws(file_name)
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.1
2
+ Name: passagemath-sws2rst
3
+ Version: 10.4.1
4
+ Summary: passagemath: SageNB worksheet converter
5
+ Author-email: The Sage Developers <sage-support@googlegroups.com>
6
+ License: GNU General Public License (GPL) v3 or later
7
+ Project-URL: Homepage, https://www.sagemath.org
8
+ Description-Content-Type: text/x-rst
9
+
10
+ sage_sws2rst: Translate legacy Sage worksheet files (.sws) to reStructuredText (.rst) files
11
+ ===========================================================================================
12
+
13
+ Description
14
+ -----------
15
+
16
+ Provides a script `sage-sws2rst`, which translates a Sage worksheet file (.sws) into a reStructuredText (.rst) file.
17
+
18
+ Sage worksheet files (.sws) are a file format that was used by the now-obsolete Sage notebook (https://github.com/sagemath/sagenb), superseded by the Jupyter notebook. SageNB was dropped in the course of the transition of SageMath to Python 3.
19
+
20
+ This package was extracted from the SageNB sources in https://github.com/sagemath/sage/issues/28838 to provide a way to convert pedagogical material written available in Sage worksheet format.
@@ -0,0 +1,9 @@
1
+ passagemath_sws2rst-10.4.1.data/scripts/sage-sws2rst,sha256=Cs5Wm2qtiZ9cGiOYXrvxttYlV4vzQD8DSzHCUSe5d2U,6869
2
+ sage_sws2rst/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ sage_sws2rst/comments2rst.py,sha256=C0HUDmY8OOhisZSk5d6WPstfY8Uw-nBM6X3gFSw-M1o,15609
4
+ sage_sws2rst/results2rst.py,sha256=Mu06rryTWeQ7C9QeEtcN88teUkmDfXR0iRJcpYljkgI,6046
5
+ sage_sws2rst/worksheet2rst.py,sha256=smRjUEttzUXWVIephNcjq54yIH0cfcCkJEmPJNgtGjM,5987
6
+ passagemath_sws2rst-10.4.1.dist-info/METADATA,sha256=LVdNYT3faDQlz_QhXYwDDaspWRYU07aXow_f8w0Res0,1086
7
+ passagemath_sws2rst-10.4.1.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
8
+ passagemath_sws2rst-10.4.1.dist-info/top_level.txt,sha256=NXbIX8bi906EaXF0_SYm-oymqgGkZRQ_laYlHnr-2rc,13
9
+ passagemath_sws2rst-10.4.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (73.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ sage_sws2rst
File without changes
@@ -0,0 +1,480 @@
1
+ # -*- coding: utf-8 -*-
2
+ r"""
3
+ Convert html from text cells in the notebook into ReStructuredText
4
+
5
+ This is called by sws2rst
6
+
7
+ - Pablo Angulo Ardoy (2011-02-25): initial version
8
+ """
9
+ #**************************************************
10
+ # Copyright (C) 2011 Pablo Angulo
11
+ #
12
+ # Distributed under the terms of the GPL License
13
+ #**************************************************
14
+
15
+ import re
16
+ import os
17
+ try:
18
+ from bs4 import (BeautifulSoup, Tag,
19
+ CData, Comment, Declaration, ProcessingInstruction)
20
+ except ImportError:
21
+ raise ImportError("""BeautifulSoup must be installed.
22
+
23
+ Please either install using
24
+
25
+ sage -pip install beautifulsoup4
26
+ """)
27
+
28
+ #negative lookbehind: http://www.regular-expressions.info/lookaround.html
29
+ double_dollar = re.compile(r'(?<!\\)\$\$')
30
+
31
+
32
+ def preprocess_display_latex(text):
33
+ r"""replace $$some display latex$$ with <display>some display latex</display>
34
+ before the soup is built.
35
+
36
+ Deals with the situation where <p></p> tags are mixed
37
+ with $$, like $$<p>display_latex$$</p>, unless the mess is huge
38
+
39
+ EXAMPLES::
40
+
41
+ >>> from sage_sws2rst.comments2rst import preprocess_display_latex
42
+ >>> s="$$a=2$$"
43
+ >>> preprocess_display_latex(s)
44
+ '<display>a=2</display>'
45
+ >>> s="<p>$$a=2$$</p>"
46
+ >>> preprocess_display_latex(s)
47
+ '<p><display>a=2</display></p>'
48
+ >>> s="<p>$$a=2</p>$$"
49
+ >>> preprocess_display_latex(s)
50
+ '<p><display>a=2</display></p>'
51
+ >>> s="$$<p>a=2</p>$$"
52
+ >>> preprocess_display_latex(s)
53
+ '<display>a=2</display>'
54
+ """
55
+ ls = []
56
+ start_tag = True
57
+ parts = double_dollar.split(text)
58
+ for c in parts[:-1]:
59
+ if start_tag:
60
+ ls.append(c)
61
+ ls.append('<display>')
62
+ else:
63
+ c0, count = prune_tags(c)
64
+ ls.append(c0)
65
+ ls.append('</display>')
66
+ if count == 1:
67
+ ls.append('<p>')
68
+ elif count == -1:
69
+ ls.append('</p>')
70
+ elif abs(count)>1:
71
+ raise Exception('display latex was messed up with html code')
72
+ start_tag = not start_tag
73
+ ls.append(parts[-1])
74
+ return ''.join(ls)
75
+
76
+
77
+ def prune_tags(text):
78
+ count = text.count('<p>') - text.count('</p>')
79
+ return text.replace('<br/>','').replace('<br />','').replace('<p>','').replace('</p>',''), count
80
+
81
+ escapable_chars = { '+' :r'\+',
82
+ '*' :r'\*',
83
+ '|' :r'\|',
84
+ '-' :r'\-'}
85
+
86
+
87
+ def escape_chars(text):
88
+ for c, r in escapable_chars.items():
89
+ text = text.replace(c, r)
90
+ return text
91
+
92
+ #This is supposed to be handled by BeautifulSoup, but doesn't work
93
+ xml_entities = {'&lt;':'<',
94
+ '&gt;':'>',
95
+ '&amp;':'&',
96
+ '&quot;':'"',
97
+ '&apos;':"'",
98
+ }
99
+
100
+
101
+ def replace_xml_entities(text):
102
+ for c, r in xml_entities.items():
103
+ text = text.replace(c, r)
104
+ return text
105
+
106
+
107
+ def replace_courier(soup):
108
+ """Lacking a better option, I use courier font to mark <code>
109
+ within tinyMCE. And I want to turn that into real code tags.
110
+
111
+ Most users won't be needing this(?), so this code is not called anywhere
112
+ but kept for reference
113
+ """
114
+ for t in soup.findAll(lambda s: ('style' in s) and 'courier' in s['style']):
115
+ tag = Tag(soup, 'code')
116
+ while t.contents:
117
+ tag.append(t.contents[0])
118
+ t.replaceWith(tag)
119
+
120
+ #negative lookbehind: http://www.regular-expressions.info/lookaround.html
121
+ single_dollar = re.compile(r'(?<!\\)\$')
122
+ def replace_latex(soup):
123
+ r"""Replaces inline latex by :math:`code` and escapes
124
+ some rst special chars like +, -, * and | outside of inline latex
125
+
126
+ does not escape chars inside display or pre tags
127
+
128
+ EXAMPLES::
129
+
130
+ >>> from sage_sws2rst.comments2rst import replace_latex
131
+ >>> from bs4 import BeautifulSoup
132
+ >>> soup = r"<p>Some <strong>latex: $e^\pi i=-1$</strong></p>"
133
+ >>> s = BeautifulSoup(soup, features='html.parser')
134
+ >>> replace_latex(s)
135
+ >>> s
136
+ <p>Some <strong>latex: :math:`e^\pi i=-1`</strong></p>
137
+
138
+ ::
139
+
140
+ >>> soup = "<p><strong>2+2 | 1+3</strong></p>"
141
+ >>> s = BeautifulSoup(soup, features='html.parser')
142
+ >>> replace_latex(s)
143
+ >>> s
144
+ <p><strong>2\+2 \| 1\+3</strong></p>
145
+ """
146
+ for t in soup.findAll(text=re.compile('.+')):
147
+ if (t.fetchParents(name = 'display') or
148
+ t.fetchParents(name = 'pre') ):
149
+ continue
150
+ parts = single_dollar.split(t)
151
+ even = [escape_chars(parts[i]) for i in range(0,len(parts),2)]
152
+ odd = [' :math:`%s`'%parts[i] for i in range(1,len(parts),2)]
153
+ odd.append('')
154
+ t.replaceWith(''.join(''.join(p) for p in zip(even,odd) ))
155
+
156
+
157
+ class Soup2Rst(object):
158
+ """builds the rst text from the Soup Tree
159
+ """
160
+ tags = {'h1':'header',
161
+ 'h2':'header',
162
+ 'h3':'header',
163
+ 'h4':'header',
164
+ 'h5':'header',
165
+ 'h6':'header',
166
+ 'p': 'p',
167
+ '[document]': 'document',
168
+ 'address': 'em',
169
+ 'br': 'br',
170
+ 'b':'strong',
171
+ 'strong':'strong',
172
+ 'em':'em',
173
+ 'pre':'pre',
174
+ 'code':'code',
175
+ 'display':'display',
176
+ 'span':'inline_no_tag',
177
+ 'ul':'ul',
178
+ 'ol':'ol',
179
+ 'li':'li',
180
+ 'a':'a',
181
+ 'table':'table',
182
+ # 'tr':'tr',
183
+ 'td':'inline_no_tag',
184
+ 'th':'inline_no_tag',
185
+ 'tt':'inline_no_tag',
186
+ 'div':'block_no_tag',
187
+ 'img':'img',
188
+ # '':'',
189
+ }
190
+
191
+ headers = {'h1':'=',
192
+ 'h2':'-',
193
+ 'h3':'^',
194
+ 'h4':'"',
195
+ 'h5':'~',
196
+ 'h6':'*',
197
+ }
198
+
199
+ def __init__(self, images_dir):
200
+ self.images_dir = images_dir
201
+ self._nested_list = -1
202
+ self._inside_ol_or_ul = []
203
+ self._inside_code_tag = False
204
+
205
+ def visit(self, node):
206
+ if isinstance(node, (CData, Comment, Declaration, ProcessingInstruction)):
207
+ return ''
208
+ elif hasattr(node, 'name') and node.name in self.tags:
209
+ method = 'visit_' + self.tags[node.name]
210
+ visitor = getattr(self, method)
211
+ return visitor(node)
212
+ else:
213
+ #Assume plain string
214
+ return str(node).replace('\n','')
215
+
216
+ def visit_document(self, node):
217
+ return '\n'.join(self.visit(tag) for tag in node.contents)
218
+
219
+ def get_plain_text(self, node):
220
+ """Gets all text, removing all tags"""
221
+ if hasattr(node, 'contents'):
222
+ t = ' '.join(self.get_plain_text(tag) for tag in node.contents)
223
+ else:
224
+ t = str(node)
225
+ return t.replace('\n','')
226
+
227
+ def visit_header(self, node):
228
+ s = ''.join(self.visit(tag) for tag in node.contents)
229
+ spacer = self.headers[node.name]*len(s)
230
+ return s.replace( '\n', '') + '\n' + spacer
231
+
232
+ def visit_pre(self, node):
233
+ return '::\n\n '+str(node)[5:-6].replace('<br />','\n').replace('<br></br>','\n').replace('\n','\n ')
234
+
235
+ def visit_ul(self, node):
236
+ self._nested_list += 1
237
+ self._inside_ol_or_ul.append(False)
238
+ result = '\n\n'+''.join(self.visit(tag) for tag in node.contents)+'\n'
239
+ self._inside_ol_or_ul.pop()
240
+ self._nested_list -= 1
241
+ return result
242
+
243
+ def visit_ol(self, node):
244
+ self._nested_list += 1
245
+ self._inside_ol_or_ul.append(True)
246
+ result = '\n\n'+''.join(self.visit(tag) for tag in node.contents)+'\n'
247
+ self._inside_ol_or_ul.pop()
248
+ self._nested_list -= 1
249
+ return result
250
+
251
+ def visit_li(self, node):
252
+ return (' '*self._nested_list
253
+ + ('#. ' if self._inside_ol_or_ul[-1] else '- ')
254
+ +' '.join(self.visit(tag) for tag in node.contents)
255
+ + '\n')
256
+
257
+ def visit_display(self, node):
258
+ return ('\n\n.. MATH::\n\n ' +
259
+ str(node)[9:-10].replace('<br></br>','\n').replace('\n','\n ') +
260
+ '\n\n.. end of math\n\n')
261
+
262
+ def visit_img(self, node):
263
+ return '.. image:: ' + os.path.join(self.images_dir, node['src'].replace(' ','_')) + '\n :align: center\n'
264
+
265
+ def visit_table(self,node):
266
+ rows = []
267
+ for elt in node.contents:
268
+ if not hasattr(elt,'name'):
269
+ pass
270
+ elif elt.name == 'thead':
271
+ rows.extend(self.prepare_tr(row)
272
+ for row in elt
273
+ if hasattr(row,'name') and
274
+ row.name=='tr')
275
+ rows.append([]) #this row represents a separator
276
+ elif (elt.name == 'tbody') or (elt.name == 'tfoot'):
277
+ rows.extend(self.prepare_tr(row)
278
+ for row in elt
279
+ if hasattr(row,'name') and
280
+ row.name=='tr')
281
+ elif elt.name == 'tr':
282
+ rows.append(self.prepare_tr(elt))
283
+
284
+ ncols = max(len(row) for row in rows)
285
+ for row in rows:
286
+ if len(row) < ncols:
287
+ row.extend( ['']*(ncols - len(row)))
288
+ cols_sizes = [max(len(td) for td in tds_in_col)
289
+ for tds_in_col in zip(*rows)]
290
+ result = [' '.join('='*c for c in cols_sizes)]
291
+
292
+ for row in rows:
293
+ if any(td for td in row):
294
+ result.append(' '.join(td+' '*(l - len(td))
295
+ for l,td in zip(cols_sizes,row)))
296
+ else:
297
+ result.append(' '.join('-'*c for c in cols_sizes))
298
+ result.append(' '.join('='*c for c in cols_sizes))
299
+ return '\n'.join(result)
300
+
301
+ def prepare_tr(self, node):
302
+ return [self.visit(tag) for tag in node.contents if tag!='\n']
303
+
304
+ def visit_br(self, node):
305
+ return '\n\n'
306
+
307
+ def visit_strong(self, node):
308
+ if node.contents:
309
+ content = ' '.join(self.visit(tag) for tag in node.contents).strip()
310
+ if not content:
311
+ return ''
312
+ elif '``' in content:
313
+ return content
314
+ else:
315
+ return ' **' + content + '** '
316
+ else:
317
+ return ''
318
+
319
+ def visit_em(self,node):
320
+ if node.contents:
321
+ content = ' '.join(self.visit(tag) for tag in node.contents).strip()
322
+ if not content:
323
+ return ''
324
+ elif '``' in content:
325
+ return content
326
+ else:
327
+ return ' *' + content + '* '
328
+ else:
329
+ return ''
330
+
331
+ def visit_code(self, node):
332
+ if node.contents:
333
+ content = self.get_plain_text(node).strip()
334
+ return '``' + content + '``'
335
+ else:
336
+ return ''
337
+
338
+ def visit_inline_no_tag(self, node):
339
+ return (' '.join(self.visit(tag)
340
+ for tag in node.contents)).strip()
341
+
342
+ def visit_block_no_tag(self, node):
343
+ return '\n'.join(self.visit(tag) for tag in node.contents) + '\n'
344
+
345
+ def visit_p(self, node):
346
+ return ''.join(self.visit(tag) for tag in node.contents) + '\n\n'
347
+
348
+ def visit_a(self, node):
349
+ c = ' '.join(self.visit(tag) for tag in node.contents)
350
+ try:
351
+ link = node['href']
352
+ if link[0]=='#':
353
+ return ':ref:`%s <%s>`'%(c, link[1:])
354
+ else:
355
+ return '`%s <%s>`_'%(c, link)
356
+ except KeyError:
357
+ return '.. _%s:\n\n'%node['name']
358
+
359
+
360
+ def html2rst(text, images_dir):
361
+ r"""
362
+ Convert html, typically generated by tinyMCE, into rst
363
+ compatible with Sage documentation.
364
+
365
+ The main job is done by BeautifulSoup, which is much more
366
+ robust than conventional parsers like HTMLParser, but also
367
+ several details specific of this context are taken into
368
+ account, so this code differs from generic approaches like
369
+ those found on the web.
370
+
371
+ INPUT:
372
+
373
+ - ``text`` -- string -- a chunk of HTML text
374
+
375
+ - ``images_dir`` -- string -- folder where images are stored
376
+
377
+ OUTPUT:
378
+
379
+ - string -- rst text
380
+
381
+ EXAMPLES::
382
+
383
+ >>> from sage_sws2rst.comments2rst import html2rst
384
+ >>> text = r'<p>Some text with <em>math</em>: $e^{\pi i}=-1$</p>'
385
+ >>> html2rst(text, '')
386
+ 'Some text with *math* : :math:`e^{\\pi i}=-1`\n\n'
387
+
388
+ ::
389
+
390
+ >>> text = '<p>Text with <em>incorrect</p> nesting</em>.'
391
+ >>> html2rst(text, '')
392
+ 'Text with *incorrect* \n\n nesting\n.'
393
+
394
+ ::
395
+
396
+ >>> text = '<pre>Preformatted: \n a+2\n</pre><p> Not preformatted: \n a+2\n</p>'
397
+ >>> html2rst(text, '')
398
+ '::\n\n Preformatted: \n a+2\n \n Not preformatted: a\\+2\n\n'
399
+
400
+ ::
401
+
402
+ >>> text = '&aacute;ñ&nbsp;&ntildeá'
403
+ >>> html2rst(text, '')
404
+ '\xe1\xf1 \xf1\xe1'
405
+
406
+ ::
407
+
408
+ >>> text = r'<p>some text</p><p>$$</p><p>3.183098861 \cdot 10^{-1}</p><p>$$</p>'
409
+ >>> html2rst(text, '')
410
+ 'some text\n\n.. MATH::\n\n 3.183098861 \\cdot 10^{-1}\n\n.. end of math\n\n'
411
+
412
+ When the content is empty::
413
+
414
+ >>> html2rst("<strong></strong> ", '')
415
+ '\n '
416
+ >>> html2rst("<strong> </strong> ", '')
417
+ '\n '
418
+ >>> html2rst("<em></em> ", '')
419
+ '\n '
420
+ >>> html2rst("<em> </em> ", '')
421
+ '\n '
422
+
423
+ Spaces are added around *italic* or **bold** text (otherwise, it
424
+ may be invalid ReStructuredText syntax)::
425
+
426
+ >>> text = '<p><strong>Exercice.</strong>Let x be ...</p>'
427
+ >>> html2rst(text, '')
428
+ ' **Exercice.** Let x be ...\n\n'
429
+ >>> text = '<p><em>Exercice.</em>Let x be ...</p>'
430
+ >>> html2rst(text, '')
431
+ ' *Exercice.* Let x be ...\n\n'
432
+
433
+ Below is an example showing the translation from html to rst is not
434
+ always perfect.
435
+
436
+ Here the strong emphasis is on more than one line and is not properly
437
+ translated::
438
+
439
+ >>> text='<p>You will find a <em>while loop</em> helpful here. Below is a simple example:</p><p style="padding-left: 30px;"><strong>x = 0<br />while x &lt; 7:<br />&nbsp;&nbsp;&nbsp; x = x + 2<br />&nbsp;&nbsp;&nbsp; print x</strong></p>'
440
+ >>> html2rst(text, '')
441
+ 'You will find a *while loop* helpful here. Below is a simple
442
+ example:\n\n **x = 0 \n\n while x < 7: \n\n x = x \\+ 2 \n\n
443
+ print x** \n\n'
444
+
445
+ """
446
+
447
+ #replace $$some display latex$$ with
448
+ #<display>some display latex</display>
449
+ text = preprocess_display_latex(text)
450
+
451
+ #eliminate nasty &nbsp;
452
+ text = text.replace('&nbsp;',' ')
453
+
454
+ #BeautifulSoup is better than BeautifulSoup
455
+ #for html that wasn't generated by humans (like tinyMCE)
456
+ soup = BeautifulSoup(text,
457
+ features='html.parser',
458
+ # https://stackoverflow.com/questions/11856011/beautifulsoup-has-no-attribute-html-entities
459
+ ##convertEntities=BeautifulSoup.ALL_ENTITIES
460
+ )
461
+
462
+ #remove all comments
463
+ comments = soup.findAll(text=lambda text:isinstance(text, Comment))
464
+ for comment in comments:
465
+ comment.extract()
466
+
467
+ # replace_courier(soup)
468
+ replace_latex(soup)
469
+ v = Soup2Rst(images_dir)
470
+
471
+ # return v.visit(soup)
472
+ text = v.visit(soup)
473
+ more_than_2_blank_lines = re.compile(r'\n\n+', re.MULTILINE)
474
+ text = more_than_2_blank_lines.sub('\n\n', text)
475
+ text = replace_xml_entities(text)
476
+ return text
477
+
478
+ if __name__ == "__main__":
479
+ import doctest
480
+ doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
@@ -0,0 +1,171 @@
1
+ # -*- coding: utf-8 -*-
2
+ r"""
3
+ Convert output from code cells in the notebook into ReStructuredText
4
+
5
+ This is called by sws2rst
6
+
7
+ - Pablo Angulo Ardoy (2011-02-25): initial version
8
+ """
9
+ #**************************************************
10
+ # Copyright (C) 2011 Pablo Angulo
11
+ #
12
+ # Distributed under the terms of the GPL License
13
+ #**************************************************
14
+
15
+
16
+ import re
17
+ IMAGES_DIR = 'images/'
18
+
19
+ #We parse lines one by one but keep track of current scope
20
+ #similarly to worksheet2rst.py
21
+ #Results are split into different types. Some are discarded
22
+ class States(object):
23
+ NORMAL = 0
24
+ HTML = 1
25
+ MATH = 2
26
+ TRACEBACK = 3
27
+
28
+ class LineTypes(object):
29
+ PLAIN = 0
30
+ IMAGE = 1
31
+ LATEX = 2
32
+ HTML = 3
33
+ TRACE = 4
34
+
35
+ class ResultsParser(object):
36
+ """Auxiliary class for results2rst
37
+ """
38
+ def __init__(self, images_dir):
39
+ ##Order matters, place more restrictive regex's before more general ones
40
+ ##If no regex matches, line will be discarded
41
+ ##a self transition is needes to produce any output
42
+ self.transitions = {
43
+ States.NORMAL:[
44
+ #IMAGE
45
+ (re.compile(r"^\<html\>\<font color='black'\>"
46
+ r"\<img src='cell\://(.*?)'\>"
47
+ r"\</font\>\</html\>"),
48
+ "\n.. image:: " + images_dir + "\\1\n :align: center\n",
49
+ LineTypes.IMAGE,
50
+ States.NORMAL),
51
+ #SELF-CONTAINED MATH
52
+ (re.compile(r"^\<html\>\<div class=\"math\"\>"
53
+ r"\\newcommand\{\\Bold\}\[1\]\{\\mathbf\{\#1\}\}"
54
+ r"(.*?)\</div\>\</html\>$"),
55
+ "\n.. MATH::\n\n \\1\n",
56
+ LineTypes.LATEX,
57
+ States.NORMAL),
58
+ #SELF-CONTAINED MATH - BIS
59
+ (re.compile(r"^\<html\>\<div class=\"math\"\>"
60
+ r"(.*?)\</div\>\</html\>$"),
61
+ "\n.. MATH::\n\n \\1",
62
+ LineTypes.LATEX,
63
+ States.NORMAL),
64
+ #START Traceback
65
+ (re.compile(r"^(Traceback.*)"),
66
+ " Traceback (most recent call last):",
67
+ LineTypes.TRACE,
68
+ States.TRACEBACK),
69
+ #START MATH
70
+ (re.compile(r"^\<html\>\<div class=\"math\"\>"
71
+ r"\\newcommand\{\\Bold\}\[1\]\{\\mathbf\{\#1\}\}(.*?)"),
72
+ "\n.. MATH::\n\n \\1",
73
+ LineTypes.LATEX,
74
+ States.MATH),
75
+ #SELF-CONTAINED HTML
76
+ (re.compile(r"^\<html\>.*</html\>$"),
77
+ " <html>...</html>",
78
+ LineTypes.HTML,
79
+ States.NORMAL),
80
+ #START HTML
81
+ (re.compile(r"^\<html\>.*"),
82
+ " <html>...</html>",
83
+ LineTypes.HTML,
84
+ States.HTML),
85
+ #CONTINUE NORMAL
86
+ (re.compile("(.*)"),
87
+ " \\1",
88
+ LineTypes.PLAIN,
89
+ States.NORMAL),
90
+ ],
91
+ States.MATH:[
92
+ #END MATH
93
+ (re.compile(r"(.*?)\</div\>\</html\>$"),
94
+ " \\1",
95
+ LineTypes.LATEX,
96
+ States.NORMAL),
97
+ #CONTINUE MATH
98
+ (re.compile("(.*)"),
99
+ " \\1",
100
+ LineTypes.LATEX,
101
+ States.MATH),
102
+ ],
103
+ States.TRACEBACK:[
104
+ #END Traceback
105
+ (re.compile(r"^(\S.*)"),
106
+ " ...\n \\1",
107
+ LineTypes.TRACE,
108
+ States.NORMAL),
109
+ ],
110
+ States.HTML:[
111
+ #END HTML
112
+ (re.compile(r".*</html\>$"),
113
+ "",
114
+ LineTypes.HTML,
115
+ States.NORMAL),
116
+ ],
117
+ }
118
+
119
+ def parse(self, text):
120
+ result_plain = []
121
+ result_show = []
122
+ state = States.NORMAL
123
+ for line in text.splitlines():
124
+ for regex, replacement, line_type, new_state in self.transitions[state]:
125
+ if regex.match(line):
126
+ result = result_plain if line_type in (LineTypes.PLAIN, LineTypes.HTML)\
127
+ else result_show
128
+ result.append( regex.sub(replacement, line))
129
+ state = new_state
130
+ break
131
+ result_plain.extend(result_show)
132
+ return '\n'.join(result_plain)
133
+
134
+ def results2rst(text, images_dir):
135
+ r"""Converts the result of evaluation of notebook cells
136
+ into rst compatible with Sage documentation.
137
+
138
+ Several common patterns are identified, and treated
139
+ accordingly. Some patterns are dropped, while others
140
+ are not recognized.
141
+
142
+ Currently, latex and images are recognized and converted.
143
+
144
+ INPUT:
145
+
146
+ - ``text`` -- string -- a chunk of HTML text
147
+
148
+ - ``images_dir`` -- string -- folder where images are stored
149
+
150
+ OUTPUT:
151
+
152
+ - string -- rst text
153
+
154
+ EXAMPLES::
155
+
156
+ >>> from sage_sws2rst.results2rst import results2rst
157
+ >>> s="<html><font color='black'><img src='cell://sage0.png'></font></html>"
158
+ >>> results2rst(s,'')
159
+ '\n.. image:: sage0.png\n :align: center\n'
160
+ >>> results2rst("4",'')
161
+ ' 4 '
162
+ >>> s=r'<html><div class="math">\newcommand{\Bold}[1]{\mathbf{#1}}\frac{3}{2}</div></html>'
163
+ >>> results2rst(s,'')
164
+ '\n.. MATH::\n\n \\frac{3}{2}\n'
165
+ """
166
+ Parser = ResultsParser(images_dir)
167
+ return Parser.parse(text)
168
+
169
+ if __name__ == "__main__":
170
+ import doctest
171
+ doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
@@ -0,0 +1,203 @@
1
+ #!/usr/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ r"""
4
+ Convert worksheet.html files into ReStructuredText documents
5
+
6
+ This is called by 'sage -sws2rst'. Can also be used as a commandline script
7
+ (if BeautifulSoup is installed):
8
+
9
+ ``python worksheet2rst.py worksheet.html``
10
+
11
+ or
12
+
13
+ ``cat worksheet.html | python worksheet2rst.py``
14
+
15
+ AUTHOR:
16
+
17
+ - Pablo Angulo Ardoy (2011-02-25): initial version
18
+
19
+
20
+ The content of worksheet.html is split into comments, code, and output
21
+ (the result of evaluating the code), as follows:
22
+
23
+ comments
24
+ {{{id=..|
25
+ code
26
+ ///
27
+ results
28
+ }}}
29
+
30
+ Each kind of text is dealt with separately.
31
+ """
32
+
33
+ #**************************************************
34
+ # Copyright (C) 2011 Pablo Angulo
35
+ #
36
+ # Distributed under the terms of the GPL License
37
+ #**************************************************
38
+
39
+
40
+ import sys
41
+ import os
42
+ import re
43
+ from .comments2rst import html2rst
44
+ from .results2rst import results2rst
45
+ import codecs
46
+
47
+ #We parse lines one by one but keep track of current scope
48
+ #comments
49
+ #{{{id=..|
50
+ #code
51
+ #///
52
+ #results
53
+ #}}}
54
+ #RESULT_TO_BE_DROPPED corresponds to a results section whose
55
+ #code was empty, and will be discarded, whether it's empty or not
56
+ class States(object):
57
+ COMMENT = 0
58
+ CODE = 1
59
+ RESULT = 2
60
+ RESULT_TO_BE_DROPPED = 3
61
+
62
+ # REs for splitting comments, code and results
63
+ START_CELL_RE = re.compile(r'^\{\{\{id=(\d*)\|')
64
+ END_CODE_RE = re.compile(r'^\/\/\/')
65
+ END_CELL_RE = re.compile(r'^\}\}\}')
66
+
67
+ #When to switch State, and which State to
68
+ transitions = {
69
+ States.COMMENT:(
70
+ START_CELL_RE,
71
+ States.CODE
72
+ ),
73
+ States.CODE:(
74
+ END_CODE_RE,
75
+ States.RESULT),
76
+ States.RESULT:(
77
+ END_CELL_RE,
78
+ States.COMMENT),
79
+ States.RESULT_TO_BE_DROPPED:(
80
+ END_CELL_RE,
81
+ States.COMMENT)
82
+ }
83
+
84
+ def code_parser(text):
85
+ """
86
+
87
+ Arguments:
88
+
89
+ INPUT:
90
+
91
+ - ``s``:sage code, may or may not start with "sage:"
92
+
93
+ OUTPUT:
94
+
95
+ - string -- rst text
96
+
97
+ EXAMPLES (not used for unit test, see
98
+ http://groups.google.com/group/sage-devel/browse_thread/thread/d82cb049ac102f3a)
99
+
100
+ : from sage_sws2rst.worksheet2rst import code_parser
101
+ : s="a=2"
102
+ : code_parser(s)
103
+ '::\n\n sage: a=2'
104
+ : s="def f(n):\n return n+1\n"
105
+ : code_parser(s)
106
+ '::\n\n sage: def f(n):\n ....: return n+1'
107
+ : s="sage: def f(n):\nsage: return n+1\n"
108
+ : code_parser(s)
109
+ '::\n\n sage: def f(n):\n ....: return n+1'
110
+ """
111
+ lines = ['::', '']
112
+ for s in text.splitlines():
113
+ l = s[6:] if s.startswith('sage: ') else s
114
+ if not l: continue
115
+ prefix = ' ....: ' if l[0] == ' ' else ' sage: '
116
+ lines.append(prefix + l)
117
+ return '\n'.join(lines)
118
+
119
+ HEADER_RE = re.compile(r'<h\d>')
120
+ def add_title_if_there_is_none(text):
121
+ if not HEADER_RE.search(text):
122
+ return '<h1>Please write a title for this worksheet!</h1>\n' + text
123
+ else:
124
+ return text
125
+
126
+ def worksheet2rst(s, images_dir=''):
127
+ """Parses a string, tipically the content of the file
128
+ worksheet.html inside a sws file, and converts it into
129
+ rst compatible with Sage documentation.
130
+
131
+ INPUT:
132
+
133
+ - ``s`` -- string -- text, tipically the content of
134
+ worksheet.html
135
+
136
+ - ``images_dir`` -- string -- folder where images are stored
137
+
138
+ OUTPUT:
139
+
140
+ - string -- rst text
141
+
142
+ EXAMPLES (not used for unit test, see
143
+ http://groups.google.com/group/sage-devel/browse_thread/thread/d82cb049ac102f3a)
144
+
145
+ : from sage_sws2rst.worksheet2rst import worksheet2rst
146
+ : worksheet2rst('<p>some text</p>\n{{{id=1|\nprint 2+2\n///\n4\n}}}')
147
+ u'.. -*- coding: utf-8 -*-\n\nPlease write a title for this worksheet!\n========================================\n\nsome text\n\n\n::\n\n sage: print 2+2\n 4\n\n.. end of output\n'
148
+ : s = '{{{id=2|\nshow(f)\n///\n<html><div class="math">\\sqrt{x}</div></html>\n}}}\n'
149
+ : worksheet2rst(s)
150
+ u'.. -*- coding: utf-8 -*-\n\nPlease write a title for this worksheet!\n========================================\n::\n\n sage: show(f)\n\n.. MATH::\n\n \\sqrt{x}\n\n.. end of output\n'
151
+ """
152
+ s = add_title_if_there_is_none(s)
153
+ state = States.COMMENT
154
+ result = ['.. -*- coding: utf-8 -*-\n']
155
+ ls = []
156
+ for line in s.splitlines():
157
+ regex, next_state= transitions[state]
158
+ m = regex.match(line)
159
+ if m:
160
+ if state == States.COMMENT:
161
+ last_cell_id = m.group(1)
162
+ img_path = images_dir + os.path.sep
163
+ result.append(html2rst('\n'.join(ls), img_path))
164
+ elif state == States.RESULT:
165
+ img_path = os.path.join(images_dir, 'cell_%s_' % last_cell_id)
166
+ result.append(results2rst('\n'.join(ls),
167
+ img_path))
168
+ result.append('')
169
+ result.append('.. end of output')
170
+ elif state == States.CODE:
171
+ if ls and any(ls):
172
+ result.append(code_parser('\n'.join(ls)))
173
+ else:
174
+ next_state = States.RESULT_TO_BE_DROPPED
175
+ ls = []
176
+ state = next_state
177
+ else:
178
+ ls.append(line)
179
+ if state == States.COMMENT:
180
+ img_path = images_dir + os.path.sep
181
+ result.append(html2rst('\n'.join(ls), img_path))
182
+ elif state == States.RESULT:
183
+ img_path = os.path.join(images_dir, 'cell_%s_' % last_cell_id)
184
+ result.append(result_parser('\n'.join(ls),
185
+ img_path))
186
+ result.append('')
187
+ result.append('.. end of output')
188
+ elif state == States.CODE:
189
+ result.append(code_parser('\n'.join(ls)))
190
+
191
+ return '\n'.join(result)
192
+
193
+ if __name__=='__main__':
194
+ if len(sys.argv)>1:
195
+ fichero = codecs.open(sys.argv[1], mode='r', encoding='utf-8')
196
+ text = fichero.read()
197
+ fichero.close()
198
+ else:
199
+ text = sys.stdin.read()
200
+ images_dir = sys.argv[2] if len(sys.argv)>2 else ''
201
+
202
+ print((worksheet2rst(text, images_dir).encode('utf-8')))
203
+