summarize 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +11 -0
- data/README.markdown +42 -0
- data/Rakefile +49 -0
- data/ext/summarize/article.c +119 -0
- data/ext/summarize/dic/bg.xml +101 -0
- data/ext/summarize/dic/ca.xml +141 -0
- data/ext/summarize/dic/cs.xml +161 -0
- data/ext/summarize/dic/cy.xml +118 -0
- data/ext/summarize/dic/da.xml +129 -0
- data/ext/summarize/dic/de.xml +354 -0
- data/ext/summarize/dic/el.xml +80 -0
- data/ext/summarize/dic/en.xml +606 -0
- data/ext/summarize/dic/eo.xml +171 -0
- data/ext/summarize/dic/es.xml +369 -0
- data/ext/summarize/dic/et.xml +172 -0
- data/ext/summarize/dic/eu.xml +77 -0
- data/ext/summarize/dic/fi.xml +105 -0
- data/ext/summarize/dic/fr.xml +199 -0
- data/ext/summarize/dic/ga.xml +124 -0
- data/ext/summarize/dic/gl.xml +290 -0
- data/ext/summarize/dic/he.xml +334 -0
- data/ext/summarize/dic/hu.xml +280 -0
- data/ext/summarize/dic/ia.xml +97 -0
- data/ext/summarize/dic/id.xml +75 -0
- data/ext/summarize/dic/is.xml +201 -0
- data/ext/summarize/dic/it.xml +206 -0
- data/ext/summarize/dic/lv.xml +77 -0
- data/ext/summarize/dic/mi.xml +76 -0
- data/ext/summarize/dic/ms.xml +160 -0
- data/ext/summarize/dic/mt.xml +73 -0
- data/ext/summarize/dic/nl.xml +245 -0
- data/ext/summarize/dic/nn.xml +264 -0
- data/ext/summarize/dic/pl.xml +92 -0
- data/ext/summarize/dic/pt.xml +365 -0
- data/ext/summarize/dic/ro.xml +163 -0
- data/ext/summarize/dic/ru.xml +150 -0
- data/ext/summarize/dic/sv.xml +255 -0
- data/ext/summarize/dic/tl.xml +67 -0
- data/ext/summarize/dic/tr.xml +65 -0
- data/ext/summarize/dic/uk.xml +98 -0
- data/ext/summarize/dic/yi.xml +293 -0
- data/ext/summarize/dictionary.c +331 -0
- data/ext/summarize/extconf.rb +6 -0
- data/ext/summarize/grader-tc.c +185 -0
- data/ext/summarize/grader-tc.h +64 -0
- data/ext/summarize/grader-tf.c +116 -0
- data/ext/summarize/grader.c +85 -0
- data/ext/summarize/highlighter.c +128 -0
- data/ext/summarize/html.c +131 -0
- data/ext/summarize/libots.h +158 -0
- data/ext/summarize/parser.c +173 -0
- data/ext/summarize/relations.c +163 -0
- data/ext/summarize/stemmer.c +332 -0
- data/ext/summarize/summarize.c +43 -0
- data/ext/summarize/summarize.h +12 -0
- data/ext/summarize/text.c +98 -0
- data/ext/summarize/wordlist.c +220 -0
- data/lib/summarize.rb +91 -0
- data/lib/summarize/summarize.bundle +0 -0
- data/sample_data/jupiter.txt +15 -0
- data/summarize.gemspec +21 -0
- metadata +140 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
/*
|
2
|
+
* grader-tc.h
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#ifndef HAVE_GRADERTC_H
|
22
|
+
#define HAVE_GRADERTC_H
|
23
|
+
|
24
|
+
|
25
|
+
#include <glib.h>
|
26
|
+
#include "libots.h"
|
27
|
+
|
28
|
+
G_BEGIN_DECLS
|
29
|
+
|
30
|
+
|
31
|
+
typedef struct
|
32
|
+
{
|
33
|
+
gchar *word; /* the word */
|
34
|
+
gchar *stem; /*stem of the word*/
|
35
|
+
gint occ; /* how many times have we seen this word in the text? */
|
36
|
+
} OtsWordEntery;
|
37
|
+
|
38
|
+
/*Word list manipulations*/
|
39
|
+
void ots_free_wordlist (GList *aList);
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
OtsWordEntery *ots_copy_wordEntery (OtsWordEntery * obj);
|
44
|
+
OtsWordEntery *ots_new_wordEntery (unsigned const char *wordString);
|
45
|
+
OtsWordEntery *ots_new_wordEntery_strip (unsigned const char *wordString,const OtsStemRule *rule);
|
46
|
+
void ots_free_wordEntery (OtsWordEntery * WC);
|
47
|
+
|
48
|
+
GList *ots_sort_list (GList* aList);
|
49
|
+
GList *ots_union_list (const GList *aLst, const GList * bLst);
|
50
|
+
|
51
|
+
char *ots_word_in_list (const GList *aList,const int index);
|
52
|
+
char *ots_stem_in_list (const GList *aList,const int index);
|
53
|
+
void ots_add_wordstat (OtsArticle * Doc,unsigned const char *wordString);
|
54
|
+
|
55
|
+
|
56
|
+
/*grader*/
|
57
|
+
|
58
|
+
void ots_grade_doc_tc (OtsArticle * Doc);
|
59
|
+
|
60
|
+
G_END_DECLS
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
#endif /* HAVE_GRADERTC_H */
|
@@ -0,0 +1,116 @@
|
|
1
|
+
/*
|
2
|
+
* grader-tf.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "libots.h"
|
25
|
+
|
26
|
+
/*Grader - using the Term frequency algorithm. Will give each line a score*/
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
OtsWordTF*
|
31
|
+
ots_new_OtsWordTF(const char* word,const double tf)
|
32
|
+
{
|
33
|
+
OtsWordTF* obj=g_new0(OtsWordTF,1);
|
34
|
+
if (word!=NULL) obj->word=g_strdup(word);
|
35
|
+
obj->tf=tf;
|
36
|
+
return obj;
|
37
|
+
}
|
38
|
+
|
39
|
+
void
|
40
|
+
ots_free_OtsWordTF(OtsWordTF *obj)
|
41
|
+
{
|
42
|
+
if (obj!=NULL)
|
43
|
+
{
|
44
|
+
if (obj->word!=NULL) g_free(obj->word);
|
45
|
+
g_free(obj);
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
void
|
50
|
+
ots_free_TF_wordlist (GList * aList)
|
51
|
+
{
|
52
|
+
if (aList != NULL)
|
53
|
+
{
|
54
|
+
g_list_foreach(aList,(GFunc)ots_free_OtsWordTF, NULL);
|
55
|
+
g_list_free(aList);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
|
60
|
+
void
|
61
|
+
ots_grade_line_tf (OtsSentence * aLine)
|
62
|
+
{
|
63
|
+
|
64
|
+
return;
|
65
|
+
}
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
void
|
70
|
+
ots_grade_doc_tf (OtsArticle * Doc)
|
71
|
+
{
|
72
|
+
|
73
|
+
GList *li;
|
74
|
+
|
75
|
+
/*Load tf list*/
|
76
|
+
/*Load idf list*/
|
77
|
+
|
78
|
+
if (0 == Doc->lineCount) return;
|
79
|
+
|
80
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
81
|
+
{
|
82
|
+
ots_grade_line_tf ((OtsSentence *) li->data /* , tf list , idf list*/);
|
83
|
+
}
|
84
|
+
|
85
|
+
return;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
double
|
90
|
+
ots_tf_word_score (const double tf,const double idf)
|
91
|
+
/*IDF: how rare is word across the collection
|
92
|
+
TF: how often is word in doc */
|
93
|
+
{
|
94
|
+
|
95
|
+
return tf*idf;
|
96
|
+
}
|
97
|
+
|
98
|
+
/*
|
99
|
+
Determine frequency of query words
|
100
|
+
n = (num-of-sentences words appears in)
|
101
|
+
N = (total-number-of-sentences)
|
102
|
+
f = n/N
|
103
|
+
*/
|
104
|
+
|
105
|
+
double
|
106
|
+
ots_calc_idf (const int term_count,const int doc_word_count)
|
107
|
+
{
|
108
|
+
return -log(doc_word_count/term_count);
|
109
|
+
}
|
110
|
+
|
111
|
+
double
|
112
|
+
ots_calc_tf (const int term_count,const int doc_word_count)
|
113
|
+
{
|
114
|
+
if (term_count==0) return 0; else
|
115
|
+
return 0.5+0.5*(doc_word_count/term_count);
|
116
|
+
}
|
@@ -0,0 +1,85 @@
|
|
1
|
+
/*
|
2
|
+
* grader.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "libots.h"
|
25
|
+
|
26
|
+
extern void ots_grade_doc_tc (OtsArticle * Doc);
|
27
|
+
|
28
|
+
/*Grader driver - will call one of the grading algorithm*/
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
void
|
33
|
+
ots_grade_structure (OtsArticle * Doc) /*must be called after the first grader*/
|
34
|
+
{
|
35
|
+
GList *li;
|
36
|
+
GList *first;
|
37
|
+
GList *second;
|
38
|
+
OtsSentence *first_line=NULL;
|
39
|
+
|
40
|
+
first = NULL;
|
41
|
+
second = NULL;
|
42
|
+
|
43
|
+
if (Doc==NULL) return;
|
44
|
+
|
45
|
+
if (Doc->lines!=NULL)
|
46
|
+
first_line= ((OtsSentence *) (Doc->lines->data));
|
47
|
+
if (NULL!=first_line) first_line->score *= 2; /*first line/title is very important so we increase its score */
|
48
|
+
|
49
|
+
/*This loop will *1.6 the score of each line that
|
50
|
+
starts with \n \n , in other words a new paragraph*/
|
51
|
+
|
52
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
53
|
+
{
|
54
|
+
OtsSentence *aLine = (li->data);
|
55
|
+
if (NULL != aLine) /*line is there */
|
56
|
+
{
|
57
|
+
first = aLine->words; /*first word? */
|
58
|
+
if (NULL != first)
|
59
|
+
second = first->next; /*second word? */
|
60
|
+
if ((NULL != first) && (NULL != second)) /*have content? */
|
61
|
+
if (strcmp (first->data, "\n") && strcmp (second->data, "\n")) /*new paragraph? */
|
62
|
+
aLine->score *= 1.6;
|
63
|
+
}
|
64
|
+
|
65
|
+
}
|
66
|
+
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
Each grader needs to do:
|
71
|
+
1.give a ->score to each line
|
72
|
+
2.Set the ->title of the document
|
73
|
+
**/
|
74
|
+
|
75
|
+
void
|
76
|
+
ots_grade_doc (OtsArticle * Doc)
|
77
|
+
{
|
78
|
+
|
79
|
+
if (Doc==NULL) return;
|
80
|
+
ots_grade_doc_tc(Doc); /*Term count*/
|
81
|
+
|
82
|
+
/* or ots_grade_doc_fc (Doc); Term Frequency */
|
83
|
+
|
84
|
+
ots_grade_structure (Doc);
|
85
|
+
}
|
@@ -0,0 +1,128 @@
|
|
1
|
+
/*
|
2
|
+
* highlighter
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "libots.h"
|
25
|
+
|
26
|
+
/*After the grader has graded the article and each
|
27
|
+
sentence has a score the highlighter will select
|
28
|
+
some of the sentences*/
|
29
|
+
|
30
|
+
static int
|
31
|
+
ots_highlight_max_line (OtsArticle * Doc)
|
32
|
+
{
|
33
|
+
GList *li;
|
34
|
+
int max = 0;
|
35
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
36
|
+
{
|
37
|
+
if (0 == (((OtsSentence *) li->data)->selected)) /* if not selected , count me in */
|
38
|
+
max = MAX (((OtsSentence *) li->data)->score, max);
|
39
|
+
|
40
|
+
}
|
41
|
+
|
42
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
43
|
+
{
|
44
|
+
|
45
|
+
if ((((OtsSentence *) li->data)->score == max) && (((OtsSentence *) li->data)->selected == 0)) /* if score==max && not selected before ,select me; */
|
46
|
+
{
|
47
|
+
((OtsSentence *) li->data)->selected = 1;
|
48
|
+
return ((OtsSentence *) li->data)->wc;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
return 0;
|
53
|
+
}
|
54
|
+
|
55
|
+
|
56
|
+
/* todo: impement this
|
57
|
+
|
58
|
+
void
|
59
|
+
ots_highlight_doc_wordcount (OtsArticle * Doc, int wordCount)
|
60
|
+
|
61
|
+
void
|
62
|
+
ots_highlight_doc_linecount (OtsArticle * Doc, int wordCount)
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
void
|
67
|
+
ots_highlight_doc_soft (OtsArticle * Doc, int percent) //blur selection by avrage of near sentences , will mark blocks
|
68
|
+
*/
|
69
|
+
|
70
|
+
void
|
71
|
+
ots_highlight_doc (OtsArticle * Doc, int percent)
|
72
|
+
{
|
73
|
+
int i;
|
74
|
+
double ratio;
|
75
|
+
int wordCount;
|
76
|
+
|
77
|
+
if (0 == Doc->lineCount)
|
78
|
+
return;
|
79
|
+
|
80
|
+
if (percent > 100)
|
81
|
+
percent = 100;
|
82
|
+
else if (percent < 0)
|
83
|
+
percent = 0;
|
84
|
+
|
85
|
+
ratio = ((double) (percent)) / (100.0);
|
86
|
+
|
87
|
+
wordCount = ots_get_article_word_count (Doc);
|
88
|
+
|
89
|
+
for (i = 0; i < (ratio * (double) wordCount);)
|
90
|
+
{
|
91
|
+
i += ots_highlight_max_line (Doc);
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
void
|
96
|
+
ots_highlight_doc_lines (OtsArticle * Doc, int lines)
|
97
|
+
{
|
98
|
+
int i;
|
99
|
+
int lineCount;
|
100
|
+
int tmp;
|
101
|
+
|
102
|
+
if (0 == Doc->lineCount) return;
|
103
|
+
|
104
|
+
lineCount = Doc->lineCount;
|
105
|
+
i=0;
|
106
|
+
while ((i<lines)&&(i<lineCount))
|
107
|
+
{
|
108
|
+
i++;
|
109
|
+
tmp=ots_highlight_max_line (Doc);
|
110
|
+
}
|
111
|
+
|
112
|
+
}
|
113
|
+
|
114
|
+
void ots_highlight_doc_words (OtsArticle * Doc, int words)
|
115
|
+
{
|
116
|
+
int i;
|
117
|
+
int docWordCount;
|
118
|
+
|
119
|
+
if (0 == Doc->lineCount) return;
|
120
|
+
|
121
|
+
docWordCount = ots_get_article_word_count (Doc);
|
122
|
+
|
123
|
+
i=0;
|
124
|
+
while ((i < docWordCount) && (i <= words))
|
125
|
+
{
|
126
|
+
i += ots_highlight_max_line (Doc);
|
127
|
+
}
|
128
|
+
}
|
@@ -0,0 +1,131 @@
|
|
1
|
+
/*
|
2
|
+
* html.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "libots.h"
|
25
|
+
|
26
|
+
static unsigned char *
|
27
|
+
ots_get_line_HTML (const OtsSentence * aLine, size_t * out_size)
|
28
|
+
{
|
29
|
+
GList *li;
|
30
|
+
GString *text;
|
31
|
+
unsigned char *utf8_data;
|
32
|
+
char *score_str;
|
33
|
+
text = g_string_new (NULL);
|
34
|
+
|
35
|
+
score_str=g_new0(char,32);
|
36
|
+
sprintf(score_str,"<!--(%ld)-->",aLine->score);
|
37
|
+
g_string_append (text,score_str);
|
38
|
+
g_free(score_str);
|
39
|
+
|
40
|
+
if ((aLine->selected))
|
41
|
+
{
|
42
|
+
g_string_append (text,
|
43
|
+
"<FONT COLOR=\"#16569E\"><span style=\'background:yellow;\'>");
|
44
|
+
}
|
45
|
+
else
|
46
|
+
{
|
47
|
+
g_string_append (text, "<FONT COLOR=\"#16569E\"><span>");
|
48
|
+
}
|
49
|
+
|
50
|
+
for (li = (GList *) aLine->words; li != NULL; li = li->next)
|
51
|
+
{
|
52
|
+
if (0 == strcmp ((char *) li->data, "\n"))
|
53
|
+
g_string_append (text, "<br>");
|
54
|
+
else
|
55
|
+
g_string_append (text, (char *) li->data);
|
56
|
+
}
|
57
|
+
g_string_append (text,"</span></FONT>\n");
|
58
|
+
|
59
|
+
if (out_size)
|
60
|
+
*out_size = text->len;
|
61
|
+
|
62
|
+
utf8_data = text->str;
|
63
|
+
g_string_free (text, FALSE);
|
64
|
+
|
65
|
+
return utf8_data;
|
66
|
+
}
|
67
|
+
|
68
|
+
|
69
|
+
#if 0
|
70
|
+
static void
|
71
|
+
ots_print_line_HTML (FILE * stream, const OtsSentence * aLine)
|
72
|
+
{
|
73
|
+
unsigned char *utf8_txt;
|
74
|
+
size_t len;
|
75
|
+
|
76
|
+
utf8_txt = ots_get_line_HTML (aLine, &len);
|
77
|
+
fwrite (utf8_txt, 1, len, stream);
|
78
|
+
g_free (utf8_txt);
|
79
|
+
}
|
80
|
+
#endif
|
81
|
+
|
82
|
+
|
83
|
+
unsigned char *
|
84
|
+
ots_get_doc_HTML (const OtsArticle * Doc, size_t * out_len)
|
85
|
+
{
|
86
|
+
GList *li;
|
87
|
+
GString *text;
|
88
|
+
unsigned char *utf8_data;
|
89
|
+
size_t line_len;
|
90
|
+
|
91
|
+
text = g_string_new (NULL);
|
92
|
+
|
93
|
+
|
94
|
+
g_string_append (text,
|
95
|
+
"<html>\n<head>\n<title>OTS</title>\n<meta charset=\"utf-8\">\n</head>\n<body>\n");
|
96
|
+
g_string_append (text, "<!-- Generated by OpenTextSummarizer -->\n");
|
97
|
+
g_string_append (text, "<!--");
|
98
|
+
g_string_append (text, Doc->title);
|
99
|
+
g_string_append (text, "-->\n");
|
100
|
+
|
101
|
+
|
102
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
103
|
+
{
|
104
|
+
utf8_data = ots_get_line_HTML ((OtsSentence *) li->data, &line_len);
|
105
|
+
g_string_append_len (text, utf8_data, line_len);
|
106
|
+
g_free (utf8_data);
|
107
|
+
}
|
108
|
+
g_string_append (text, "</body></html>\n");
|
109
|
+
|
110
|
+
if (out_len)
|
111
|
+
*out_len = text->len;
|
112
|
+
utf8_data = text->str;
|
113
|
+
|
114
|
+
g_string_free (text, FALSE);
|
115
|
+
return utf8_data;
|
116
|
+
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
void
|
122
|
+
ots_print_HTML (FILE * stream, const OtsArticle * Doc)
|
123
|
+
{
|
124
|
+
unsigned char *utf8_txt;
|
125
|
+
size_t len;
|
126
|
+
|
127
|
+
utf8_txt = ots_get_doc_HTML (Doc, &len);
|
128
|
+
fwrite (utf8_txt, 1, len, stream);
|
129
|
+
g_free (utf8_txt);
|
130
|
+
|
131
|
+
}
|